PREPROCESS.PY

In [1]:
pip install dpkt

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import dpkt
import random
import pickle

In [3]:
protocols = ['dns', 'smtp', 'ssh', 'ftp', 'http', 'https']
ports = [53, 25, 22, 21, 80, 443]


1. Importation des modules :
   - `import numpy as np` : Importe la bibliothèque NumPy sous l'alias `np`.
   - `import dpkt` : Importe le module `dpkt`, utilisé pour travailler avec des paquets de données réseau.
   - `import random` : Importe le module `random`, utilisé pour générer des nombres aléatoires.
   - `import pickle` : Importe le module `pickle`, utilisé pour la sérialisation et la désérialisation d'objets Python.

2. Définition des variables :
   - `protocols` : Une liste contenant les noms des protocoles réseau.
   - `ports` : Une liste contenant les numéros de port associés à chaque protocole.

3. Définition de la fonction `gen_flows(pcap)` :
   - Cette fonction prend un objet `pcap` comme argument, qui est supposé être un fichier de capture réseau au format pcap.
   - Elle initialise une liste `flows` qui contiendra les flux de données pour chaque protocole.
   - Elle parcourt ensuite chaque paquet dans le fichier pcap et vérifie le protocole et le port associés à chaque paquet.
   - Si un paquet correspond à un protocole et à un port spécifiés, il est ajouté au flux de données correspondant.
   - La fonction retourne une liste de flux de données, où chaque élément de la liste correspond à un protocole réseau, et chaque flux de données est un dictionnaire où les clés sont les adresses IP source et destination, ainsi que les ports, et les valeurs sont des listes de paquets IP correspondant à ce flux.

4. Dans le code principal (non inclus dans l'extrait que vous avez fourni), la fonction `gen_flows()` est probablement appelée pour traiter un fichier pcap et générer des flux de données en fonction des protocoles et des ports spécifiés.

Un objet `pcap` est une structure de données qui représente un fichier de capture réseau au format pcap. Pcap est un format de fichier standard utilisé pour enregistrer et stocker des données de trafic réseau capturées à partir d'une interface réseau ou d'un fichier de journalisation.

In [4]:
def gen_flows(pcap):
	flows = [{} for _ in range(len(protocols))]

	if pcap.datalink() != dpkt.pcap.DLT_EN10MB:
		print('unknow data link!')
		return

	xgr = 0
	for _, buff in pcap:
		eth = dpkt.ethernet.Ethernet(buff)
		xgr += 1
		if xgr % 500000 == 0:
			print('The %dth pkt!'%xgr)
			break

		if isinstance(eth.data, dpkt.ip.IP) and (
		isinstance(eth.data.data, dpkt.udp.UDP)
		or isinstance(eth.data.data, dpkt.tcp.TCP)):
			# tcp or udp packet
			ip = eth.data

			# loop all protocols
			for name in protocols:
				index = protocols.index(name)
				if ip.data.sport == ports[index] or \
				ip.data.dport == ports[index]:
					if len(flows[index]) >= 10000:
						# each class has at most 1w flows
						break
					# match a protocol
					key = '.'.join(map(str, map(int, ip.src))) + \
					'.' + '.'.join(map(str, map(int, ip.dst))) + \
					'.' + '.'.join(map(str, [ip.p, ip.data.sport, ip.data.dport]))

					if key not in flows[index]:
						flows[index][key] = [ip]
					elif len(flows[index][key]) < 1000:
						# each flow has at most 1k flows
						flows[index][key].append(ip)
					# after match a protocol quit
					break

	return flows


In [5]:
def closure(flows):
	flow_dict = {}
	for name in protocols:
		index = protocols.index(name)
		flow_dict[name] = flows[index]
		print('============================')
		print('Generate flows for %s'%name)
		print('Total flows: ', len(flows[index]))
		cnt = 0
		for k, v in flows[index].items():
			cnt += len(v)
		print('Total pkts: ', cnt)

	with open('pro_flows.pkl', 'wb') as f:
		pickle.dump(flow_dict, f)

In [7]:

"""from google.colab import drive
drive.mount("/content/gdrive")
pcap = dpkt.pcap.Reader(open('gdrive/MyDrive/202006101400.pcap', 'rb'))
flows = gen_flows(pcap)
closure(flows)"""

'from google.colab import drive\ndrive.mount("/content/gdrive")\npcap = dpkt.pcap.Reader(open(\'gdrive/MyDrive/202006101400.pcap\', \'rb\'))\nflows = gen_flows(pcap)\nclosure(flows)'

TOOL.PY

In [6]:
pip install tqdm

Note: you may need to restart the kernel to use updated packages.


In [7]:
import pickle
import dpkt
import random
import numpy as np
#from preprocess import protocols
from tqdm import tqdm, trange

In [8]:
ip_features = {'hl':1,'tos':1,'len':2,'df':1,'mf':1,'ttl':1,'p':1}
tcp_features = {'off':1,'flags':1,'win':2}
udp_features = {'ulen':2}
max_byte_len = 50

In [9]:
def mask(p):
	p.src = b'\x00\x00\x00\x00'
	p.dst = b'\x00\x00\x00\x00'
	p.sum = 0
	p.id = 0
	p.offset = 0

	if isinstance(p.data, dpkt.tcp.TCP):
		p.data.sport = 0
		p.data.dport = 0
		p.data.seq = 0
		p.data.ack = 0
		p.data.sum = 0

	elif isinstance(p.data, dpkt.udp.UDP):
		p.data.sport = 0
		p.data.dport = 0
		p.data.sum = 0

	return p

In [10]:
def pkt2feature(data, k):
	flow_dict = {'train':{}, 'test':{}}

	# train->protocol->flowid->[pkts]
	for p in protocols:
		flow_dict['train'][p] = []
		flow_dict['test'][p] = []
		all_pkts = []
		p_keys = list(data[p].keys())

		for flow in p_keys:
			pkts = data[p][flow]
			all_pkts.extend(pkts)
		random.Random(1024).shuffle(all_pkts)

		for idx in range(len(all_pkts)):
			pkt = mask(all_pkts[idx])
			raw_byte = pkt.pack()

			byte = []
			pos = []
			for x in range(min(len(raw_byte),max_byte_len)):
				byte.append(int(raw_byte[x]))
				pos.append(x)

			byte.extend([0]*(max_byte_len-len(byte)))
			pos.extend([0]*(max_byte_len-len(pos)))
			# if len(byte) != max_byte_len or len(pos) != max_byte_len:
			# 	print(len(byte), len(pos))
			# 	input()
			if idx in range(k*int(len(all_pkts)*0.1), (k+1)*int(len(all_pkts)*0.1)):
				flow_dict['test'][p].append((byte, pos))
			else:
				flow_dict['train'][p].append((byte, pos))
	return flow_dict

In [11]:
def load_epoch_data(flow_dict, train='train'):
	flow_dict = flow_dict[train]
	x, y, label = [], [], []

	for p in protocols:
		pkts = flow_dict[p]
		for byte, pos in pkts:
			x.append(byte)
			y.append(pos)
			label.append(protocols.index(p))

	return np.array(x), np.array(y), np.array(label)[:, np.newaxis]

In [13]:
	'''with open('pro_flows.pkl','rb') as f:
		data = pickle.load(f)

	for i in trange(10, mininterval=2, \
		desc='  - (Building fold dataset)   ', leave=False):
		flow_dict = pkt2feature(data, i)
		with open('pro_flows_%d_noip_fold.pkl'%i, 'wb') as f:
			pickle.dump(flow_dict, f)'''

KeyboardInterrupt: 

SAM.PY

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
import math

In [13]:
torch.manual_seed(2020)
torch.cuda.manual_seed_all(2020)
np.random.seed(2020)
random.seed(2020)
torch.backends.cudnn.deterministic = True


In [14]:
class SelfAttention(nn.Module):
	"""docstring for SelfAttention"""
	def __init__(self, d_dim=256, dropout=0.1):
		super(SelfAttention, self).__init__()
		# for query, key, value, output
		self.dim = d_dim
		self.linears = nn.ModuleList([nn.Linear(d_dim, d_dim) for _ in range(4)])
		self.dropout = nn.Dropout(p=dropout)

	def attention(self, query, key, value):
		scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.dim)
		scores = F.softmax(scores, dim=-1)
		return scores

	def forward(self, query, key, value):
		# 1) query, key, value
		query, key, value = \
		[l(x) for l, x in zip(self.linears, (query, key, value))]

		# 2) Apply attention
		scores = self.attention(query, key, value)
		x = torch.matmul(scores, value)

		# 3) apply the final linear
		x = self.linears[-1](x.contiguous())
		# sum keepdim=False
		return self.dropout(x), torch.mean(scores, dim=-2)

In [41]:
class OneDimCNN(nn.Module):
	"""docstring for OneDimCNN"""
	# https://blog.csdn.net/sunny_xsc1994/article/details/82969867
	def __init__(self, max_byte_len, d_dim=256, \
		kernel_size = [3, 4], filters=256, dropout=0.1):
		super(OneDimCNN, self).__init__()
		self.kernel_size = kernel_size
		self.convs = nn.ModuleList([
						nn.Sequential(nn.Conv1d(in_channels=d_dim,
												out_channels=filters,
												kernel_size=h),
						#nn.BatchNorm1d(num_features=config.feature_size),
						nn.ReLU(),
						# MaxPool1d:
						# stride – the stride of the window. Default value is kernel_size
						nn.MaxPool1d(kernel_size=max_byte_len-h+1))
						for h in self.kernel_size
						]
						)
		self.dropout = nn.Dropout(p=dropout)

	def forward(self, x):
		out = [conv(x.transpose(-2,-1)) for conv in self.convs]
		out = torch.cat(out, dim=1)
		out = out.view(-1, out.size(1))
		return self.dropout(out)


In [33]:
class SAM(nn.Module):
	"""docstring for SAM"""
	# total header bytes 24
	def __init__(self, num_class, max_byte_len, kernel_size = [3, 4], \
		d_dim=256, dropout=0.1, filters=256):
		super(SAM, self).__init__()
		self.posembedding = nn.Embedding(num_embeddings=max_byte_len,
								embedding_dim=d_dim)
		self.byteembedding = nn.Embedding(num_embeddings=300,
								embedding_dim=d_dim)
		self.attention = SelfAttention(d_dim, dropout)
		self.cnn = OneDimCNN(max_byte_len, d_dim, kernel_size, filters, dropout)
		self.fc = nn.Linear(in_features=256*len(kernel_size),
                            out_features=num_class)

	def forward(self, x, y):
		out = self.byteembedding(x) + self.posembedding(y)
		out, score = self.attention(out, out, out)
		out = self.cnn(out)
		out = self.fc(out)
		if not self.training:
			return F.softmax(out, dim=-1).max(1)[1], score
		return out

In [34]:
	x = np.random.randint(0, 255, (10, 20))
	y = np.random.randint(0, 20, (10, 20))
	sam = SAM(num_class=5, max_byte_len=20)
	out = sam(torch.from_numpy(x).long(), torch.from_numpy(y).long())
	print(out[0])

	sam.eval()
	out, score = sam(torch.from_numpy(x).long(), torch.from_numpy(y).long())
	print(out[0], score[0])

tensor([-0.2203, -0.0799, -0.0033, -0.0247, -0.1041],
       grad_fn=<SelectBackward0>)
tensor(2) tensor([0.0409, 0.0561, 0.0431, 0.0441, 0.0604, 0.0449, 0.0791, 0.0465, 0.0471,
        0.0453, 0.0514, 0.0312, 0.0553, 0.0426, 0.0486, 0.0581, 0.0389, 0.0523,
        0.0626, 0.0514], grad_fn=<SelectBackward0>)


TRAIN.PY

In [18]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [19]:
#import sklearn
import torch
import torch.nn.functional as F
import torch.utils.data
import torch.optim as optim
import torch.nn as nn
import argparse
import time
from tqdm import tqdm, trange
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix
import pickle
import numpy as np

In [35]:
class Dataset(torch.utils.data.Dataset):
	"""docstring for Dataset"""
	def __init__(self, x, y, label):
		super(Dataset, self).__init__()
		self.x = x
		self.y = y
		self.label = label

	def __len__(self):
		return len(self.x)

	def __getitem__(self, idx):
		return self.x[idx], self.y[idx], self.label[idx]

In [36]:
def paired_collate_fn(insts):
	x, y, label = list(zip(*insts))
	return torch.LongTensor(x), torch.LongTensor(y), torch.LongTensor(label)

In [37]:
def cal_loss(pred, gold, cls_ratio=None):
	gold = gold.contiguous().view(-1)
	# By default, the losses are averaged over each loss element in the batch.
	loss = F.cross_entropy(pred, gold)

	# torch.max(a,0) 返回每一列中最大值的那个元素，且返回索引
	pred = F.softmax(pred, dim = -1).max(1)[1]
	# 相等位置输出1，否则0
	n_correct = pred.eq(gold)
	acc = n_correct.sum().item() / n_correct.shape[0]

	return loss, acc*100

In [38]:
def test_epoch(model, test_data):
	''' Epoch operation in training phase'''
	model.eval()

	total_acc = []
	total_pred = []
	total_score = []
	total_time = []
	# tqdm: 进度条库
	# desc ：进度条的描述
	# leave：把进度条的最终形态保留下来 bool
	# mininterval：最小进度更新间隔，以秒为单位
	for batch in tqdm(
		test_data, mininterval=2,
		desc='  - (Testing)   ', leave=False):

		# prepare data
		src_seq, src_seq2, gold = batch
		src_seq, src_seq2, gold = src_seq.cuda(), src_seq2.cuda(), gold.cuda()
		gold = gold.contiguous().view(-1)

		# forward
		torch.cuda.synchronize()
		start = time.time()
		pred, score = model(src_seq, src_seq2)
		torch.cuda.synchronize()
		end = time.time()
		# 相等位置输出1，否则0
		n_correct = pred.eq(gold)
		acc = n_correct.sum().item()*100 / n_correct.shape[0]
		total_acc.append(acc)
		total_pred.extend(pred.long().tolist())
		total_score.append(torch.mean(score, dim=0).tolist())
		total_time.append(end - start)

	return sum(total_acc)/len(total_acc), np.array(total_score).mean(axis=0), \
	total_pred, sum(total_time)/len(total_time)

Without cuda and Torch

In [57]:
import time
from tqdm import tqdm
import numpy as np

def test_epoch1(model, test_data):
    ''' Epoch operation in training phase'''
    model.eval()

    total_acc = []
    total_pred = []
    total_score = []
    total_time = []

    for batch in tqdm(test_data, mininterval=2, desc='  - (Testing)   ', leave=False):
        # prepare data
        src_seq, src_seq2, gold = batch
        # No need for CUDA operations here
        #src_seq, src_seq2, gold = src_seq.cuda(), src_seq2.cuda(), gold.cuda()
        src_seq, src_seq2, gold = src_seq, src_seq2, gold
        gold = gold.contiguous().view(-1)

        # forward
        start = time.time()
        pred, score = model(src_seq, src_seq2)
        end = time.time()

        # Calculate accuracy
        n_correct = pred.eq(gold)
        acc = n_correct.sum().item() * 100 / n_correct.shape[0]
        total_acc.append(acc)

        # Append predictions, scores, and time
        total_pred.extend(pred.long().tolist())
        total_score.append(np.mean(score.detach().cpu().numpy(), axis=0))
        total_time.append(end - start)

    # Calculate average accuracy, score, and time
    avg_acc = sum(total_acc) / len(total_acc)
    avg_score = np.mean(total_score, axis=0)
    avg_time = sum(total_time) / len(total_time)

    return avg_acc, avg_score, total_pred, avg_time


In [39]:
def train_epoch(model, training_data, optimizer):
	''' Epoch operation in training phase'''
	model.train()

	total_loss = []
	total_acc = []
	# tqdm: 进度条库
	# desc ：进度条的描述
	# leave：把进度条的最终形态保留下来 bool
	# mininterval：最小进度更新间隔，以秒为单位
	for batch in tqdm(
		training_data, mininterval=2,
		desc='  - (Training)   ', leave=False):

		# prepare data
		src_seq, src_seq2, gold = batch
		src_seq, src_seq2, gold = src_seq.cuda(), src_seq2.cuda(), gold.cuda()
		#src_seq, src_seq2, gold = src_seq, src_seq2, gold
		optimizer.zero_grad()
		# forward
		pred = model(src_seq, src_seq2)
		loss_per_batch, acc_per_batch = cal_loss(pred, gold)
		# update parameters
		loss_per_batch.backward()
		optimizer.step()

		# 只有一个元素，可以用item取而不管维度
		total_loss.append(loss_per_batch.item())
		total_acc.append(acc_per_batch)

	return sum(total_loss)/len(total_loss), sum(total_acc)/len(total_acc)

In [40]:
def main(i, flow_dict):
	f = open('results_%d.txt'%i, 'w')
	f.write('Train Loss Time Test\n')
	f.flush()

	model = SAM(num_class=len(protocols), max_byte_len=max_byte_len).cuda()
	optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()))
	loss_list = []
	# default epoch is 3
	for epoch_i in trange(3, mininterval=2, \
		desc='  - (Training Epochs)   ', leave=False):

		train_x, train_y, train_label = load_epoch_data(flow_dict, 'train')
		training_data = torch.utils.data.DataLoader(
				Dataset(x=train_x, y=train_y, label=train_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=True
			)
		train_loss, train_acc = train_epoch(model, training_data, optimizer)

		test_x, test_y, test_label = load_epoch_data(flow_dict, 'test')
		test_data = torch.utils.data.DataLoader(
				Dataset(x=test_x, y=test_y, label=test_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=False
			)
		test_acc, score, pred, test_time = test_epoch(model, test_data)
		with open('atten_%d.txt'%i, 'w') as f2:
			f2.write(' '.join(map('{:.4f}'.format, score)))

		# write F1, PRECISION, RECALL
		with open('metric_%d.txt'%i, 'w') as f3:
			f3.write('F1 PRE REC\n')
			p, r, fscore, _ = precision_recall_fscore_support(test_label, pred)
			for a, b, c in zip(fscore, p, r):
				# for every cls
				f3.write('%.2f %.2f %.2f\n'%(a, b, c))
				f3.flush()
			if len(fscore) != len(protocols):
				a = set(pred)
				b = set(test_label[:,0])
				f3.write('%s\n%s'%(str(a), str(b)))

		# write Confusion Matrix
		with open('cm_%d.pkl'%i, 'wb') as f4:
			pickle.dump(confusion_matrix(test_label, pred, normalize='true'), f4)


		# write ACC
		f.write('%.2f %.4f %.6f %.2f\n'%(train_acc, train_loss, test_time, test_acc))
		f.flush()


	f.close()


In [60]:
#pip uninstall torch torchvision torchaudio


^C
Note: you may need to restart the kernel to use updated packages.


In [23]:
print(torch.cuda.is_available())


True


In [None]:
#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

Looking in indexes: https://download.pytorch.org/whl/cu124
Note: you may need to restart the kernel to use updated packages.


In [None]:
#pip install torch torchvision -f https://download.pytorch.org/whl/cu124/torch_stable.html


Looking in links: https://download.pytorch.org/whl/cu124/torch_stable.html
Note: you may need to restart the kernel to use updated packages.


In [62]:
import torch

print(torch.__version__)
print(torch.version.cuda)



2.3.0.dev20240306+cu121
12.1


In [29]:

	for i in range(1):
		with open('pro_flows_%d_noip_fold.pkl'%i, 'rb') as f:
			flow_dict = pickle.load(f)
		print('====', i, ' fold validation ====')
		main(i, flow_dict)

==== 0  fold validation ====


  return torch.LongTensor(x), torch.LongTensor(y), torch.LongTensor(label)
                                                                        

--RESIDUAL LAYER--

In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SAMRES(nn.Module):
    """docstring for ImprovedSAM"""
    # total header bytes 24
    def __init__(self, num_class, max_byte_len, kernel_size=[3, 4], \
        d_dim=256, dropout=0.1, filters=256):
        super(SAMRES, self).__init__()
        self.posembedding = nn.Embedding(num_embeddings=max_byte_len,
                                          embedding_dim=d_dim)
        self.byteembedding = nn.Embedding(num_embeddings=300,
                                          embedding_dim=d_dim)
        self.attention = SelfAttention(d_dim, dropout)
        self.cnn = OneDimCNN(max_byte_len, d_dim, kernel_size, filters, dropout)
        self.fc = nn.Linear(in_features=filters * len(kernel_size),
                            out_features=num_class)

        # Residual connection
        self.residual_conv = nn.Sequential(
            nn.Conv1d(in_channels=d_dim, out_channels=filters, kernel_size=1),
            nn.ReLU()
        )

    def forward(self, x, y):
        out = self.byteembedding(x) + self.posembedding(y)
        out, score = self.attention(out, out, out)
        out = self.residual_conv(out.transpose(1, 2)).transpose(1, 2) + out  # Residual connection
        out = self.cnn(out)
        out = self.fc(out)
        if not self.training:
            return F.softmax(out, dim=-1).max(1)[1], score
        return out

In [69]:
x = np.random.randint(0, 255, (10, 20))
y = np.random.randint(0, 20, (10, 20))
samres = SAMRES(num_class=5, max_byte_len=20)
out = samres(torch.from_numpy(x).long(), torch.from_numpy(y).long())
print(out[0])

samres.eval()
out, score = samres(torch.from_numpy(x).long(), torch.from_numpy(y).long())
print(out[0], score[0])

tensor([-0.0545,  0.0886,  0.1150,  0.0472,  0.0651],
       grad_fn=<SelectBackward0>)
tensor(4) tensor([0.0406, 0.0588, 0.0706, 0.0430, 0.0497, 0.0797, 0.0379, 0.0353, 0.0473,
        0.0526, 0.0559, 0.0450, 0.0485, 0.0442, 0.0515, 0.0631, 0.0381, 0.0417,
        0.0480, 0.0485], grad_fn=<SelectBackward0>)


In [34]:
def mainres(i, flow_dict):
	f = open('results_res_%d.txt'%i, 'w')
	f.write('Train Loss Time Test\n')
	f.flush()

	model = SAMRES(num_class=len(protocols), max_byte_len=max_byte_len).cuda()
	optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()))
	loss_list = []
	# default epoch is 3
	for epoch_i in trange(3, mininterval=100, \
		desc='  - (Training Epochs)   ', leave=False):

		train_x, train_y, train_label = load_epoch_data(flow_dict, 'train')
		training_data = torch.utils.data.DataLoader(
				Dataset(x=train_x, y=train_y, label=train_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=True
			)
		train_loss, train_acc = train_epoch(model, training_data, optimizer)

		test_x, test_y, test_label = load_epoch_data(flow_dict, 'test')
		test_data = torch.utils.data.DataLoader(
				Dataset(x=test_x, y=test_y, label=test_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=False
			)
		test_acc, score, pred, test_time = test_epoch(model, test_data)
		with open('atten_res_%d.txt'%i, 'w') as f2:
			f2.write(' '.join(map('{:.4f}'.format, score)))

		# write F1, PRECISION, RECALL
		with open('metric_res_%d.txt'%i, 'w') as f3:
			f3.write('F1 PRE REC\n')
			p, r, fscore, _ = precision_recall_fscore_support(test_label, pred)
			for a, b, c in zip(fscore, p, r):
				# for every cls
				print("a:",a,"b:",b,"c:",c)
				f3.write('%.2f %.2f %.2f\n'%(a, b, c))
				f3.flush()
			if len(fscore) != len(protocols):
				a = set(pred)
				b = set(test_label[:,0])
				f3.write('%s\n%s'%(str(a), str(b)))

		# write Confusion Matrix
		with open('cm_res_%d.pkl'%i, 'wb') as f4:
			pickle.dump(confusion_matrix(test_label, pred, normalize='true'), f4)


		# write ACC
		f.write('%.2f %.4f %.6f %.2f\n'%(train_acc, train_loss, test_time, test_acc))
		f.flush()

		# # early stop
		# if len(loss_list) == 5:
		# 	if abs(sum(loss_list)/len(loss_list) - train_loss) < 0.005:
		# 		break
		# 	loss_list[epoch_i%len(loss_list)] = train_loss
		# else:
		# 	loss_list.append(train_loss)

	f.close()


In [35]:
for i in range(1):
		with open('pro_flows_%d_noip_fold.pkl'%i, 'rb') as f:
			flow_dict = pickle.load(f)
		print('====', i, ' fold validation ====')
		mainres(i, flow_dict)

==== 0  fold validation ====


  - (Training Epochs)   :  33%|███▎      | 1/3 [03:30<07:01, 210.78s/it]

a: 0.9713530189510797 b: 0.9914529914529915 c: 0.9520518358531318
a: 0.9741759726224783 b: 0.9736498649864986 c: 0.9747026491259686
a: 0.9709942917828223 b: 0.9734171740359983 c: 0.968583440924289
a: 0.9054928796005178 b: 0.8568428421421072 c: 0.96
a: 0.9327734671106985 b: 0.9192056537577009 c: 0.9467478128565995
a: 0.861522198731501 b: 0.8895187128120269 c: 0.8352342226189295


  - (Training Epochs)   :  67%|██████▋   | 2/3 [07:06<03:33, 213.48s/it]

a: 0.9666298342541436 b: 0.9895927601809955 c: 0.9447084233261339
a: 0.9805831826401447 b: 0.9839638904010162 c: 0.9772256262389619
a: 0.9731234149072824 b: 0.9682123545797149 c: 0.9780845499387559
a: 0.9221235281966536 b: 0.9742470536883457 c: 0.8752941176470588
a: 0.936992896340437 b: 0.9128973004887329 c: 0.962394965247761
a: 0.8819167735005717 b: 0.9264732646598478 c: 0.8414493041092268


                                                                        

a: 0.9641983544585279 b: 0.9935838680109991 c: 0.9365010799136069
a: 0.9785062521122001 b: 0.9786605975934022 c: 0.9783519553072626
a: 0.9725495402794835 b: 0.9751389489799315 c: 0.9699738471215281
a: 0.94492525570417 b: 0.9479084451460142 c: 0.9419607843137255
a: 0.9326827936811218 b: 0.9204350314825415 c: 0.9452609011376604
a: 0.8654595771249219 b: 0.8839101061629671 c: 0.8477635624318159




-- LSTM LAYER --

In [65]:
class SAMLSTM(nn.Module):
    """docstring for SAM"""
    # total header bytes 24
    def __init__(self, num_class, max_byte_len, kernel_size=[3, 4], \
        d_dim=256, dropout=0.1, filters=256, lstm_hidden_size=128, lstm_num_layers=1):
        super(SAMLSTM, self).__init__()
        self.posembedding = nn.Embedding(num_embeddings=max_byte_len,
                                embedding_dim=d_dim)
        self.byteembedding = nn.Embedding(num_embeddings=300,
                                embedding_dim=d_dim)
        self.attention = SelfAttention(d_dim, dropout)
        self.cnn = OneDimCNN(max_byte_len, d_dim, kernel_size, filters, dropout)
        self.fc = nn.Linear(in_features=lstm_hidden_size, out_features=num_class)
        self.lstm = nn.LSTM(input_size=filters * len(kernel_size),
                            hidden_size=lstm_hidden_size,
                            num_layers=lstm_num_layers,
                            batch_first=True)               

    def forward(self, x, y):
        out = self.byteembedding(x) + self.posembedding(y)
        out, score = self.attention(out, out, out)
        out = self.cnn(out)
    
    # Reshape out to a 2D tensor before passing to the linear layer
        out, _ = self.lstm(out.unsqueeze(1))  # Unsqueeze to add a dummy sequence dimension
        out = out[:, -1, :] 
    
        out = self.fc(out)
    
        if not self.training:
            return F.softmax(out, dim=-1).max(1)[1], score
        return out
        

In [66]:
x = np.random.randint(0, 255, (10, 20))
y = np.random.randint(0, 20, (10, 20))
samlstm = SAMLSTM(num_class=5, max_byte_len=20)
out = samlstm(torch.from_numpy(x).long(), torch.from_numpy(y).long())
print(out[0])

samlstm.eval()
out, score = samlstm(torch.from_numpy(x).long(), torch.from_numpy(y).long())
print(out[0], score[0])

tensor([ 0.0534,  0.0236,  0.0817,  0.0177, -0.0229],
       grad_fn=<SelectBackward0>)
tensor(2) tensor([0.0412, 0.0433, 0.0428, 0.0511, 0.0348, 0.0487, 0.0491, 0.0298, 0.0557,
        0.0755, 0.0650, 0.0631, 0.0496, 0.0546, 0.0550, 0.0525, 0.0505, 0.0432,
        0.0340, 0.0605], grad_fn=<SelectBackward0>)


In [67]:
def mainlstm(i, flow_dict):
	f = open('results_lstm_%d.txt'%i, 'w')
	f.write('Train Loss Time Test\n')
	f.flush()

	model = SAMLSTM(num_class=len(protocols), max_byte_len=max_byte_len).cuda()
	optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()))
	loss_list = []
	# default epoch is 3
	for epoch_i in trange(3, mininterval=100, \
		desc='  - (Training Epochs)   ', leave=False):

		train_x, train_y, train_label = load_epoch_data(flow_dict, 'train')
		training_data = torch.utils.data.DataLoader(
				Dataset(x=train_x, y=train_y, label=train_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=True
			)
		train_loss, train_acc = train_epoch(model, training_data, optimizer)

		test_x, test_y, test_label = load_epoch_data(flow_dict, 'test')
		test_data = torch.utils.data.DataLoader(
				Dataset(x=test_x, y=test_y, label=test_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=False
			)
		test_acc, score, pred, test_time = test_epoch(model, test_data)
		with open('atten_lstm_%d.txt'%i, 'w') as f2:
			f2.write(' '.join(map('{:.4f}'.format, score)))

		# write F1, PRECISION, RECALL
		with open('metric_lstm_%d.txt'%i, 'w') as f3:
			f3.write('F1 PRE REC\n')
			p, r, fscore, _ = precision_recall_fscore_support(test_label, pred)
			for a, b, c in zip(fscore, p, r):
				# for every cls
				f3.write('%.2f %.2f %.2f\n'%(a, b, c))
				f3.flush()
			if len(fscore) != len(protocols):
				a = set(pred)
				b = set(test_label[:,0])
				f3.write('%s\n%s'%(str(a), str(b)))

		# write Confusion Matrix
		with open('cm_lstm_%d.pkl'%i, 'wb') as f4:
			pickle.dump(confusion_matrix(test_label, pred, normalize='true'), f4)


		# write ACC
		f.write('%.2f %.4f %.6f %.2f\n'%(train_acc, train_loss, test_time, test_acc))
		f.flush()

		# # early stop
		# if len(loss_list) == 5:
		# 	if abs(sum(loss_list)/len(loss_list) - train_loss) < 0.005:
		# 		break
		# 	loss_list[epoch_i%len(loss_list)] = train_loss
		# else:
		# 	loss_list.append(train_loss)

	f.close()

In [68]:
for i in range(1):
		with open('pro_flows_%d_noip_fold.pkl'%i, 'rb') as f:
			flow_dict = pickle.load(f)
		print('====', i, ' fold validation ====')
		mainlstm(i, flow_dict)

==== 0  fold validation ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
                                                                        

-- BATCH NORMALIZATION --

In [78]:
class OneDimCNNBN(nn.Module):
	"""docstring for OneDimCNN"""
	# https://blog.csdn.net/sunny_xsc1994/article/details/82969867
	def __init__(self, max_byte_len, d_dim=256, \
		kernel_size = [3, 4], filters=256, dropout=0.1):
		super(OneDimCNNBN, self).__init__()
		self.kernel_size = kernel_size
		self.convs = nn.ModuleList([
						nn.Sequential(nn.Conv1d(in_channels=d_dim,
												out_channels=filters,
												kernel_size=h),
						nn.BatchNorm1d(num_features=filters),
						nn.ReLU(),
						# MaxPool1d:
						# stride – the stride of the window. Default value is kernel_size
						nn.MaxPool1d(kernel_size=max_byte_len-h+1))
						for h in self.kernel_size
						]
						)
		self.dropout = nn.Dropout(p=dropout)

	def forward(self, x):
		out = [conv(x.transpose(-2,-1)) for conv in self.convs]
		out = torch.cat(out, dim=1)
		out = out.view(-1, out.size(1))
		return self.dropout(out)


In [79]:
class SAMBN(nn.Module):
	"""docstring for SAM"""
	# total header bytes 24
	def __init__(self, num_class, max_byte_len, kernel_size = [3, 4], \
		d_dim=256, dropout=0.1, filters=256):
		super(SAMBN, self).__init__()
		self.posembedding = nn.Embedding(num_embeddings=max_byte_len,
								embedding_dim=d_dim)
		self.byteembedding = nn.Embedding(num_embeddings=300,
								embedding_dim=d_dim)
		self.attention = SelfAttention(d_dim, dropout)
		self.cnn = OneDimCNNBN(max_byte_len, d_dim, kernel_size, filters, dropout)
		self.fc = nn.Linear(in_features=256*len(kernel_size),
                            out_features=num_class)

	def forward(self, x, y):
		out = self.byteembedding(x) + self.posembedding(y)
		out, score = self.attention(out, out, out)
		out = self.cnn(out)
		out = self.fc(out)
		if not self.training:
			return F.softmax(out, dim=-1).max(1)[1], score
		return out

In [81]:
x = np.random.randint(0, 255, (10, 20))
y = np.random.randint(0, 20, (10, 20))
sambn = SAMBN(num_class=5, max_byte_len=20)
out = sambn(torch.from_numpy(x).long(), torch.from_numpy(y).long())
print(out[0])

sambn.eval()
out, score = sambn(torch.from_numpy(x).long(), torch.from_numpy(y).long())
print(out[0], score[0])

tensor([ 0.0344,  1.1171,  0.7604,  0.4999, -1.0242],
       grad_fn=<SelectBackward0>)
tensor(2) tensor([0.0385, 0.0438, 0.0543, 0.0395, 0.0533, 0.0460, 0.0581, 0.0584, 0.0376,
        0.0608, 0.0536, 0.0510, 0.0479, 0.0447, 0.0464, 0.0587, 0.0522, 0.0483,
        0.0643, 0.0427], grad_fn=<SelectBackward0>)


In [82]:
def mainbn(i, flow_dict):
	f = open('results_bn_%d.txt'%i, 'w')
	f.write('Train Loss Time Test\n')
	f.flush()

	model = SAMBN(num_class=len(protocols), max_byte_len=max_byte_len).cuda()
	optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()))
	loss_list = []
	# default epoch is 3
	for epoch_i in trange(3, mininterval=100, \
		desc='  - (Training Epochs)   ', leave=False):

		train_x, train_y, train_label = load_epoch_data(flow_dict, 'train')
		training_data = torch.utils.data.DataLoader(
				Dataset(x=train_x, y=train_y, label=train_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=True
			)
		train_loss, train_acc = train_epoch(model, training_data, optimizer)

		test_x, test_y, test_label = load_epoch_data(flow_dict, 'test')
		test_data = torch.utils.data.DataLoader(
				Dataset(x=test_x, y=test_y, label=test_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=False
			)
		test_acc, score, pred, test_time = test_epoch(model, test_data)
		with open('atten_bn_%d.txt'%i, 'w') as f2:
			f2.write(' '.join(map('{:.4f}'.format, score)))

		# write F1, PRECISION, RECALL
		with open('metric_bn_%d.txt'%i, 'w') as f3:
			f3.write('F1 PRE REC\n')
			p, r, fscore, _ = precision_recall_fscore_support(test_label, pred)
			for a, b, c in zip(fscore, p, r):
				# for every cls
				f3.write('%.2f %.2f %.2f\n'%(a, b, c))
				f3.flush()
			if len(fscore) != len(protocols):
				a = set(pred)
				b = set(test_label[:,0])
				f3.write('%s\n%s'%(str(a), str(b)))

		# write Confusion Matrix
		with open('cm_bn_%d.pkl'%i, 'wb') as f4:
			pickle.dump(confusion_matrix(test_label, pred, normalize='true'), f4)


		# write ACC
		f.write('%.2f %.4f %.6f %.2f\n'%(train_acc, train_loss, test_time, test_acc))
		f.flush()

		# # early stop
		# if len(loss_list) == 5:
		# 	if abs(sum(loss_list)/len(loss_list) - train_loss) < 0.005:
		# 		break
		# 	loss_list[epoch_i%len(loss_list)] = train_loss
		# else:
		# 	loss_list.append(train_loss)

	f.close()

In [84]:
for i in range(1):
		with open('pro_flows_%d_noip_fold.pkl'%i, 'rb') as f:
			flow_dict = pickle.load(f)
		print('====', i, ' fold validation ====')
		mainbn(i, flow_dict)

==== 0  fold validation ====


                                                                        

-- SPATIAL DROPOUT --

In [47]:
class OneDimCNNSD(nn.Module):
	"""docstring for OneDimCNN"""
	# https://blog.csdn.net/sunny_xsc1994/article/details/82969867
	def __init__(self, max_byte_len, d_dim=256, \
		kernel_size = [3, 4], filters=256, dropout=0.1):
		super(OneDimCNNSD, self).__init__()
		self.kernel_size = kernel_size
		self.convs = nn.ModuleList([
						nn.Sequential(nn.Conv1d(in_channels=d_dim,
												out_channels=filters,
												kernel_size=h),
						#nn.BatchNorm1d(num_features=filters),
						nn.ReLU(),
						# MaxPool1d:
						# stride – the stride of the window. Default value is kernel_size
						nn.MaxPool1d(kernel_size=max_byte_len-h+1),
						nn.Dropout2d(p=dropout))
						for h in self.kernel_size
						]
						)
		self.dropout = nn.Dropout(p=dropout)

	def forward(self, x):
		out = [conv(x.transpose(-2,-1)) for conv in self.convs]
		out = torch.cat(out, dim=1)
		out = out.view(-1, out.size(1))
		return self.dropout(out)


In [48]:
class SAMSD(nn.Module):
	"""docstring for SAM"""
	# total header bytes 24
	def __init__(self, num_class, max_byte_len, kernel_size = [3, 4], \
		d_dim=256, dropout=0.1, filters=256):
		super(SAMSD, self).__init__()
		self.posembedding = nn.Embedding(num_embeddings=max_byte_len,
								embedding_dim=d_dim)
		self.byteembedding = nn.Embedding(num_embeddings=300,
								embedding_dim=d_dim)
		self.attention = SelfAttention(d_dim, dropout)
		self.cnn = OneDimCNNSD(max_byte_len, d_dim, kernel_size, filters, dropout)
		self.fc = nn.Linear(in_features=256*len(kernel_size),
                            out_features=num_class)

	def forward(self, x, y):
		out = self.byteembedding(x) + self.posembedding(y)
		out, score = self.attention(out, out, out)
		out = self.cnn(out)
		out = self.fc(out)
		if not self.training:
			return F.softmax(out, dim=-1).max(1)[1], score
		return out

In [49]:
x = np.random.randint(0, 255, (10, 20))
y = np.random.randint(0, 20, (10, 20))
samsd = SAMSD(num_class=5, max_byte_len=20)
out = samsd(torch.from_numpy(x).long(), torch.from_numpy(y).long())
print(out[0])

samsd.eval()
out, score = samsd(torch.from_numpy(x).long(), torch.from_numpy(y).long())
print(out[0], score[0])

tensor([ 0.0589, -0.0442,  0.0066, -0.0018,  0.0818],
       grad_fn=<SelectBackward0>)
tensor(0) tensor([0.0494, 0.0620, 0.0536, 0.0470, 0.0479, 0.0441, 0.0498, 0.0329, 0.0638,
        0.0642, 0.0503, 0.0438, 0.0479, 0.0566, 0.0422, 0.0356, 0.0626, 0.0464,
        0.0414, 0.0584], grad_fn=<SelectBackward0>)


In [50]:
def mainsd(i, flow_dict):
	f = open('results_sd_%d.txt'%i, 'w')
	f.write('Train Loss Time Test\n')
	f.flush()

	model = SAMSD(num_class=len(protocols), max_byte_len=max_byte_len).cuda()
	optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()))
	loss_list = []
	# default epoch is 3
	for epoch_i in trange(3, mininterval=100, \
		desc='  - (Training Epochs)   ', leave=False):

		train_x, train_y, train_label = load_epoch_data(flow_dict, 'train')
		training_data = torch.utils.data.DataLoader(
				Dataset(x=train_x, y=train_y, label=train_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=True
			)
		train_loss, train_acc = train_epoch(model, training_data, optimizer)

		test_x, test_y, test_label = load_epoch_data(flow_dict, 'test')
		test_data = torch.utils.data.DataLoader(
				Dataset(x=test_x, y=test_y, label=test_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=False
			)
		test_acc, score, pred, test_time = test_epoch(model, test_data)
		with open('atten_sd_%d.txt'%i, 'w') as f2:
			f2.write(' '.join(map('{:.4f}'.format, score)))

		# write F1, PRECISION, RECALL
		with open('metric_sd_%d.txt'%i, 'w') as f3:
			f3.write('F1 PRE REC\n')
			p, r, fscore, _ = precision_recall_fscore_support(test_label, pred)
			for a, b, c in zip(fscore, p, r):
				# for every cls
				f3.write('%.2f %.2f %.2f\n'%(a, b, c))
				f3.flush()
			if len(fscore) != len(protocols):
				a = set(pred)
				b = set(test_label[:,0])
				f3.write('%s\n%s'%(str(a), str(b)))

		# write Confusion Matrix
		with open('cm_sd_%d.pkl'%i, 'wb') as f4:
			pickle.dump(confusion_matrix(test_label, pred, normalize='true'), f4)


		# write ACC
		f.write('%.2f %.4f %.6f %.2f\n'%(train_acc, train_loss, test_time, test_acc))
		f.flush()

		# # early stop
		# if len(loss_list) == 5:
		# 	if abs(sum(loss_list)/len(loss_list) - train_loss) < 0.005:
		# 		break
		# 	loss_list[epoch_i%len(loss_list)] = train_loss
		# else:
		# 	loss_list.append(train_loss)

	f.close()

In [51]:
for i in range(1):
		with open('pro_flows_%d_noip_fold.pkl'%i, 'rb') as f:
			flow_dict = pickle.load(f)
		print('====', i, ' fold validation ====')
		mainsd(i, flow_dict)

==== 0  fold validation ====


  return torch.LongTensor(x), torch.LongTensor(y), torch.LongTensor(label)
                                                                        

In [48]:
print(torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'))

cuda:0


multi head attention

In [24]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import math

class OneDimCNN(nn.Module):
    def __init__(self, max_byte_len, d_dim=256, kernel_size=[3, 4], filters=256, dropout=0.1):
        super(OneDimCNN, self).__init__()
        self.kernel_size = kernel_size
        self.convs = nn.ModuleList([
            nn.Sequential(
                nn.Conv1d(in_channels=d_dim, out_channels=filters, kernel_size=h),
                nn.ReLU(),
                nn.MaxPool1d(kernel_size=max_byte_len - h + 1)
            ) for h in self.kernel_size
        ])
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x):
        out = [conv(x.transpose(-2, -1)) for conv in self.convs]
        out = torch.cat(out, dim=1)
        out = out.view(-1, out.size(1))
        return self.dropout(out)

class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, nhead, dropout=0.1):
        super(MultiHeadAttention, self).__init__()
        self.d_model = d_model
        self.nhead = nhead
        self.head_dim = d_model // nhead

        self.query_linear = nn.Linear(d_model, d_model)
        self.key_linear = nn.Linear(d_model, d_model)
        self.value_linear = nn.Linear(d_model, d_model)
        self.out_linear = nn.Linear(d_model, d_model)

        self.dropout = nn.Dropout(p=dropout)

    def attention(self, query, key, value):
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.head_dim)
        scores = F.softmax(scores, dim=-1)
        return scores

    def forward(self, query, key, value):
        query = self.query_linear(query)
        key = self.key_linear(key)
        value = self.value_linear(value)

        query, key, value = [self.split_heads(x) for x in (query, key, value)]

        scores = self.attention(query, key, value)
        x = torch.matmul(scores, value)

        x = self.combine_heads(x)
        x = self.out_linear(x)

        return self.dropout(x), torch.mean(scores, dim=-2)

    def split_heads(self, x):
        batch_size, seq_len, features = x.size()
        x = x.view(batch_size, seq_len, self.nhead, self.head_dim)
        return x.permute(0, 2, 1, 3).contiguous().view(batch_size * self.nhead, seq_len, self.head_dim)

    def combine_heads(self, x):
        batch_size, seq_len, _ = x.size()
        x = x.view(batch_size // self.nhead, self.nhead, seq_len, -1)
        x = x.permute(0, 2, 1, 3).contiguous().view(batch_size // self.nhead, seq_len, -1)
        return x

class MHSAM(nn.Module):
    def __init__(self, num_class, max_byte_len, kernel_size=[3, 4], d_dim=256, dropout=0.1, filters=256, nhead=4):
        super(MHSAM, self).__init__()
        self.posembedding = nn.Embedding(num_embeddings=max_byte_len, embedding_dim=d_dim)
        self.byteembedding = nn.Embedding(num_embeddings=300, embedding_dim=d_dim)
        self.attention = MultiHeadAttention(d_dim, nhead, dropout)
        self.cnn = OneDimCNN(max_byte_len, d_dim, kernel_size, filters, dropout)
        self.fc = nn.Linear(in_features=filters * len(kernel_size), out_features=num_class)

    def forward(self, x, y):
        out = self.byteembedding(x) + self.posembedding(y)
        out, _ = self.attention(out, out, out)
        out = self.cnn(out)
        out = self.fc(out)
        if not self.training:
            return F.softmax(out, dim=-1).max(1)[1]
        return out

# Example usage:
x = np.random.randint(0, 255, (10, 20))
y = np.random.randint(0, 20, (10, 20))
mhsam = MHSAM(num_class=5, max_byte_len=20)
output = mhsam(torch.from_numpy(x).long(), torch.from_numpy(y).long())
print(output)


tensor([[-0.2084, -0.0758,  0.0391, -0.0643, -0.1008],
        [-0.2907, -0.0412,  0.0855, -0.0513, -0.0134],
        [-0.2040,  0.0444,  0.0456, -0.0121, -0.0880],
        [-0.2368,  0.0116, -0.0561, -0.0048, -0.0496],
        [-0.1875,  0.0198,  0.0541, -0.1247, -0.0469],
        [-0.2301,  0.1281, -0.0541, -0.0699,  0.0156],
        [-0.2305, -0.0129,  0.0129, -0.0475, -0.0241],
        [-0.2025, -0.0035,  0.0553, -0.0471,  0.0039],
        [-0.2269, -0.0351,  0.0587, -0.0241, -0.1163],
        [-0.2030,  0.0207,  0.1049, -0.0080, -0.1280]],
       grad_fn=<AddmmBackward0>)


In [27]:
def mainmhsam(i, flow_dict):
	f = open('results_parallel%d.txt'%i, 'w')
	f.write('Train Loss Time Test\n')
	f.flush()

	model = MHSAM(num_class=len(protocols), max_byte_len=max_byte_len).cuda()
	optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()))
	loss_list = []
	# default epoch is 3
	for epoch_i in trange(3, mininterval=100, \
		desc='  - (Training Epochs)   ', leave=False):

		train_x, train_y, train_label = load_epoch_data(flow_dict, 'train')
		training_data = torch.utils.data.DataLoader(
				Dataset(x=train_x, y=train_y, label=train_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=True
			)
		train_loss, train_acc = train_epoch(model, training_data, optimizer)

		test_x, test_y, test_label = load_epoch_data(flow_dict, 'test')
		test_data = torch.utils.data.DataLoader(
				Dataset(x=test_x, y=test_y, label=test_label),
				num_workers=0,
				collate_fn=paired_collate_fn,
				batch_size=128,
				shuffle=False
			)
		for batch in tqdm(test_data, mininterval=2, desc='  - (Testing)   ', leave=False):
   			 test_acc, score, pred, test_time = test_epoch(model, batch)
		with open('atten_mhsam%d.txt'%i, 'w') as f2:
			f2.write(' '.join(map('{:.4f}'.format, score)))

		# write F1, PRECISION, RECALL
		with open('metric_mhsam%d.txt'%i, 'w') as f3:
			f3.write('F1 PRE REC\n')
			p, r, fscore, _ = precision_recall_fscore_support(test_label, pred)
			for a, b, c in zip(fscore, p, r):
				# for every cls
				f3.write('%.2f %.2f %.2f\n'%(a, b, c))
				f3.flush()
			if len(fscore) != len(protocols):
				a = set(pred)
				b = set(test_label[:,0])
				f3.write('%s\n%s'%(str(a), str(b)))

		# write Confusion Matrix
		with open('cm_mhsam%d.pkl'%i, 'wb') as f4:
			pickle.dump(confusion_matrix(test_label, pred, normalize='true'), f4)


		# write ACC
		f.write('%.2f %.4f %.6f %.2f\n'%(train_acc, train_loss, test_time, test_acc))
		f.flush()

			# # early stop
		# if len(loss_list) == 5:
		# 	if abs(sum(loss_list)/len(loss_list) - train_loss) < 0.005:
		# 		break
		# 	loss_list[epoch_i%len(loss_list)] = train_loss
		# else:
		# 	loss_list.append(train_loss)

	f.close()


In [29]:
for i in range(1):
		with open('pro_flows_%d_noip_fold.pkl'%i, 'rb') as f:
			flow_dict = pickle.load(f)
		print('====', i, ' fold validation ====')
		mainmhsam(i, flow_dict)

==== 0  fold validation ====


  - (Training Epochs)   :   0%|          | 0/3 [00:00<?, ?it/s]
[A
                                                               

ValueError: too many values to unpack (expected 3)

In [30]:
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, nhead, dropout=0.1):
        super(MultiHeadAttention, self).__init__()
        self.d_model = d_model
        self.nhead = nhead
        self.head_dim = d_model // nhead

        self.query_linear = nn.Linear(d_model, d_model)
        self.key_linear = nn.Linear(d_model, d_model)
        self.value_linear = nn.Linear(d_model, d_model)
        self.out_linear = nn.Linear(d_model, d_model)

        self.dropout = nn.Dropout(p=dropout)

    def attention(self, query, key, value):
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.head_dim)
        scores = F.softmax(scores, dim=-1)
        return scores

    def forward(self, query, key, value):
        query = self.query_linear(query)
        key = self.key_linear(key)
        value = self.value_linear(value)

        query, key, value = [self.split_heads(x) for x in (query, key, value)]

        scores = self.attention(query, key, value)
        x = torch.matmul(scores, value)

        x = self.combine_heads(x)
        x = self.out_linear(x)

        return self.dropout(x), torch.mean(scores, dim=-2)

    def split_heads(self, x):
        batch_size, seq_len, features = x.size()
        x = x.view(batch_size, seq_len, self.nhead, self.head_dim)
        return x.permute(0, 2, 1, 3).contiguous().view(batch_size * self.nhead, seq_len, self.head_dim)

    def combine_heads(self, x):
        batch_size, seq_len, _ = x.size()
        x = x.view(batch_size // self.nhead, self.nhead, seq_len, -1)
        x = x.permute(0, 2, 1, 3).contiguous().view(batch_size // self.nhead, seq_len, -1)
        return x


class OneDimCNN(nn.Module):
    def __init__(self, max_byte_len, d_dim=256, kernel_size=[3, 4], filters=256, dropout=0.1):
        super(OneDimCNN, self).__init__()
        self.kernel_size = kernel_size
        self.convs = nn.ModuleList([
            nn.Sequential(
                nn.Conv1d(in_channels=d_dim, out_channels=filters, kernel_size=h),
                nn.ReLU(),
                nn.MaxPool1d(kernel_size=max_byte_len - h + 1)
            ) for h in self.kernel_size
        ])
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x):
        out = [conv(x.transpose(-2, -1)) for conv in self.convs]
        out = torch.cat(out, dim=1)
        out = out.view(-1, out.size(1))
        return self.dropout(out)


class MHSAM(nn.Module):
    def __init__(self, num_class, max_byte_len, kernel_size=[3, 4], d_dim=256, dropout=0.1, filters=256, nhead=4):
        super(MHSAM, self).__init__()
        self.posembedding = nn.Embedding(num_embeddings=max_byte_len, embedding_dim=d_dim)
        self.byteembedding = nn.Embedding(num_embeddings=300, embedding_dim=d_dim)
        self.attention = MultiHeadAttention(d_dim, nhead, dropout)
        self.cnn = OneDimCNN(max_byte_len, d_dim, kernel_size, filters, dropout)
        self.fc = nn.Linear(in_features=filters * len(kernel_size), out_features=num_class)

    def forward(self, x, y):
        out = self.byteembedding(x) + self.posembedding(y)
        out, _ = self.attention(out, out, out)
        out = self.cnn(out)
        out = self.fc(out)
        if not self.training:
            return F.softmax(out, dim=-1).max(1)[1]
        return out

# Exemple d'utilisation:
x = torch.randint(0, 255, (10, 20))
y = torch.randint(0, 20, (10, 20))
mhsam = MHSAM(num_class=5, max_byte_len=20)
output = mhsam(x, y)
print(output)


tensor([[ 1.2640e-02,  1.1647e-01, -1.3449e-01,  1.8673e-02, -2.8180e-02],
        [ 7.5116e-02,  4.2536e-02, -1.8551e-01,  7.0953e-03, -4.9415e-02],
        [ 9.9890e-02, -2.3339e-02, -9.2603e-02,  2.4429e-02,  1.0407e-01],
        [ 3.3617e-02,  4.9561e-02, -9.4847e-02,  2.8700e-03,  6.8238e-02],
        [ 5.4745e-02, -4.2953e-03, -1.1213e-01, -1.5701e-02,  9.8361e-04],
        [ 1.8611e-02,  1.1650e-04, -1.5371e-01,  5.9058e-02, -3.5684e-02],
        [-4.3111e-02, -2.0026e-02, -1.4689e-01,  3.2048e-02,  6.0493e-02],
        [ 7.1340e-02,  8.3140e-02, -1.8492e-01,  7.7579e-02, -3.5308e-02],
        [ 1.0083e-02,  9.6251e-03, -4.4236e-02, -3.0073e-02,  8.6676e-02],
        [ 7.7682e-02,  4.9190e-02, -1.2023e-01,  5.4921e-03, -3.9368e-02]],
       grad_fn=<AddmmBackward0>)


In [31]:
def mainmhsam(i, flow_dict):
    f = open('results_%d.txt' % i, 'w')
    f.write('Train Loss Time Test\n')
    f.flush()

    model = MHSAM(num_class=len(protocols), max_byte_len=max_byte_len).cuda()
    optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()))
    loss_list = []

    # default epoch is 3
    for epoch_i in trange(3, mininterval=2, desc='  - (Training Epochs)   ', leave=False):
        train_x, train_y, train_label = load_epoch_data(flow_dict, 'train')
        training_data = torch.utils.data.DataLoader(
            Dataset(x=train_x, y=train_y, label=train_label),
            num_workers=0,
            collate_fn=paired_collate_fn,
            batch_size=128,
            shuffle=True
        )
        train_loss, train_acc = train_epoch(model, training_data, optimizer)

        test_x, test_y, test_label = load_epoch_data(flow_dict, 'test')
        test_data = torch.utils.data.DataLoader(
            Dataset(x=test_x, y=test_y, label=test_label),
            num_workers=0,
            collate_fn=paired_collate_fn,
            batch_size=128,
            shuffle=False
        )
        test_acc, score, pred, test_time = test_epoch(model, test_data)
        with open('atten_%d.txt' % i, 'w') as f2:
            f2.write(' '.join(map('{:.4f}'.format, score)))

        # write F1, PRECISION, RECALL
        with open('metric_%d.txt' % i, 'w') as f3:
            f3.write('F1 PRE REC\n')
            p, r, fscore, _ = precision_recall_fscore_support(test_label, pred)
            for a, b, c in zip(fscore, p, r):
                # for every cls
                f3.write('%.2f %.2f %.2f\n' % (a, b, c))
                f3.flush()
            if len(fscore) != len(protocols):
                a = set(pred)
                b = set(test_label[:, 0])
                f3.write('%s\n%s' % (str(a), str(b)))

        # write Confusion Matrix
        with open('cm_%d.pkl' % i, 'wb') as f4:
            pickle.dump(confusion_matrix(test_label, pred, normalize='true'), f4)

        # write ACC
        f.write('%.2f %.4f %.6f %.2f\n' % (train_acc, train_loss, test_time, test_acc))
        f.flush()

    f.close()


In [32]:
for i in range(1):
		with open('pro_flows_%d_noip_fold.pkl'%i, 'rb') as f:
			flow_dict = pickle.load(f)
		print('====', i, ' fold validation ====')
		mainmhsam(i, flow_dict)

==== 0  fold validation ====


                                                               

ValueError: too many values to unpack (expected 2)

kernel size, dropout and optimzer RMSprop

In [56]:
class OneDimCNN(nn.Module):
	"""docstring for OneDimCNN"""
	# https://blog.csdn.net/sunny_xsc1994/article/details/82969867
	def __init__(self, max_byte_len, d_dim=256, \
		kernel_size = [3, 4], filters=256, dropout=0.09):
		super(OneDimCNN, self).__init__()
		self.kernel_size = kernel_size
		self.convs = nn.ModuleList([
						nn.Sequential(nn.Conv1d(in_channels=d_dim,
												out_channels=filters,
												kernel_size=h),
						#nn.BatchNorm1d(num_features=config.feature_size),
						nn.ReLU(),
						# MaxPool1d:
						# stride – the stride of the window. Default value is kernel_size
						nn.MaxPool1d(kernel_size=max_byte_len-h+1))
						for h in self.kernel_size
						]
						)
		self.dropout = nn.Dropout(p=dropout)

	def forward(self, x):
		out = [conv(x.transpose(-2,-1)) for conv in self.convs]
		out = torch.cat(out, dim=1)
		out = out.view(-1, out.size(1))
		return self.dropout(out)


In [57]:
class SAMFiltre(nn.Module):
    def __init__(self, num_class, max_byte_len, kernel_size=[3, 4], d_dim=256, dropout=0.09, filters=256):
        super(SAMFiltre, self).__init__()
        self.posembedding = nn.Embedding(num_embeddings=max_byte_len, embedding_dim=d_dim)
        self.byteembedding = nn.Embedding(num_embeddings=300, embedding_dim=d_dim)
        self.attention = SelfAttention(d_dim, dropout)
        self.cnn = OneDimCNN(max_byte_len, d_dim, kernel_size, filters, dropout)
        self.fc = nn.Linear(in_features=filters * len(kernel_size), out_features=num_class)

    def forward(self, x, y):
        out = self.byteembedding(x) + self.posembedding(y)
        out, score = self.attention(out, out, out)
        out = self.cnn(out)
        out = self.fc(out)
        if not self.training:
            return F.softmax(out, dim=-1).max(1)[1], score
        return out


In [58]:
def mainfiltre(i, flow_dict):
    f = open('results_kernel%d.txt' % i, 'w')
    f.write('Train Loss Time Test\n')
    f.flush()

    model = SAMFiltre(num_class=len(protocols), max_byte_len=max_byte_len).cuda()
    optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()))
    loss_list = []

    # default epoch is 3
    for epoch_i in trange(3, mininterval=2, desc='  - (Training Epochs)   ', leave=False):
        train_x, train_y, train_label = load_epoch_data(flow_dict, 'train')
        training_data = torch.utils.data.DataLoader(
            Dataset(x=train_x, y=train_y, label=train_label),
            num_workers=0,
            collate_fn=paired_collate_fn,
            batch_size=128,
            shuffle=True
        )
        train_loss, train_acc = train_epoch(model, training_data, optimizer)

        test_x, test_y, test_label = load_epoch_data(flow_dict, 'test')
        test_data = torch.utils.data.DataLoader(
            Dataset(x=test_x, y=test_y, label=test_label),
            num_workers=0,
            collate_fn=paired_collate_fn,
            batch_size=128,
            shuffle=False
        )
        test_acc, score, pred, test_time = test_epoch(model, test_data)
        with open('atten_%dkernel.txt' % i, 'w') as f2:
            f2.write(' '.join(map('{:.4f}'.format, score)))

        # write F1, PRECISION, RECALL
        with open('metric_%dkernel.txt' % i, 'w') as f3:
            f3.write('F1 PRE REC\n')
            p, r, fscore, _ = precision_recall_fscore_support(test_label, pred)
            for a, b, c in zip(fscore, p, r):
                # for every cls
                f3.write('%.2f %.2f %.2f\n' % (a, b, c))
                f3.flush()
            if len(fscore) != len(protocols):
                a = set(pred)
                b = set(test_label[:, 0])
                f3.write('%s\n%s' % (str(a), str(b)))

        # write Confusion Matrix
        with open('cm_%dkernel.pkl' % i, 'wb') as f4:
            pickle.dump(confusion_matrix(test_label, pred, normalize='true'), f4)

        # write ACC
        f.write('%.2f %.4f %.6f %.2f\n' % (train_acc, train_loss, test_time, test_acc))
        f.flush()

    f.close()


In [59]:
for i in range(1):
		with open('pro_flows_%d_noip_fold.pkl'%i, 'rb') as f:
			flow_dict = pickle.load(f)
		print('====', i, ' fold validation ====')
		mainfiltre(i, flow_dict)

==== 0  fold validation ====


                                                                        

optimizer SGD

In [24]:
def mainSGD(i, flow_dict):
    f = open('results_SGD%d.txt' % i, 'w')
    f.write('Train Loss Time Test\n')
    f.flush()

    model = SAM(num_class=len(protocols), max_byte_len=max_byte_len).cuda()
    optimizer = optim.SGD(filter(lambda x: x.requires_grad, model.parameters()))
    loss_list = []

    # default epoch is 3
    for epoch_i in trange(5, mininterval=2, desc='  - (Training Epochs)   ', leave=False):
        train_x, train_y, train_label = load_epoch_data(flow_dict, 'train')
        training_data = torch.utils.data.DataLoader(
            Dataset(x=train_x, y=train_y, label=train_label),
            num_workers=0,
            collate_fn=paired_collate_fn,
            batch_size=128,
            shuffle=True
        )
        train_loss, train_acc = train_epoch(model, training_data, optimizer)

        test_x, test_y, test_label = load_epoch_data(flow_dict, 'test')
        test_data = torch.utils.data.DataLoader(
            Dataset(x=test_x, y=test_y, label=test_label),
            num_workers=0,
            collate_fn=paired_collate_fn,
            batch_size=128,
            shuffle=False
        )
        test_acc, score, pred, test_time = test_epoch(model, test_data)
        with open('atten_%dSGD.txt' % i, 'w') as f2:
            f2.write(' '.join(map('{:.4f}'.format, score)))

        # write F1, PRECISION, RECALL
        with open('metric_%dSGD.txt' % i, 'w') as f3:
            f3.write('F1 PRE REC\n')
            p, r, fscore, _ = precision_recall_fscore_support(test_label, pred)
            for a, b, c in zip(fscore, p, r):
                # for every cls
                f3.write('%.2f %.2f %.2f\n' % (a, b, c))
                f3.flush()
            if len(fscore) != len(protocols):
                a = set(pred)
                b = set(test_label[:, 0])
                f3.write('%s\n%s' % (str(a), str(b)))

        # write Confusion Matrix
        with open('cm_%dSGD.pkl' % i, 'wb') as f4:
            pickle.dump(confusion_matrix(test_label, pred, normalize='true'), f4)

        # write ACC
        f.write('%.2f %.4f %.6f %.2f\n' % (train_acc, train_loss, test_time, test_acc))
        f.flush()

    f.close()



In [25]:
for i in range(1):
		with open('pro_flows_%d_noip_fold.pkl'%i, 'rb') as f:
			flow_dict = pickle.load(f)
		print('====', i, ' fold validation ====')
		mainSGD(i, flow_dict)

==== 0  fold validation ====


  return torch.LongTensor(x), torch.LongTensor(y), torch.LongTensor(label)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
                                                                        

AdamW 

In [27]:
def mainAdamW(i, flow_dict):
    f = open('results_AdamW%d.txt' % i, 'w')
    f.write('Train Loss Time Test\n')
    f.flush()

    model = SAM(num_class=len(protocols), max_byte_len=max_byte_len).cuda()
    optimizer = optim.AdamW(filter(lambda x: x.requires_grad, model.parameters()))
    loss_list = []

    # default epoch is 3
    for epoch_i in trange(3, mininterval=2, desc='  - (Training Epochs)   ', leave=False):
        train_x, train_y, train_label = load_epoch_data(flow_dict, 'train')
        training_data = torch.utils.data.DataLoader(
            Dataset(x=train_x, y=train_y, label=train_label),
            num_workers=0,
            collate_fn=paired_collate_fn,
            batch_size=128,
            shuffle=True
        )
        train_loss, train_acc = train_epoch(model, training_data, optimizer)

        test_x, test_y, test_label = load_epoch_data(flow_dict, 'test')
        test_data = torch.utils.data.DataLoader(
            Dataset(x=test_x, y=test_y, label=test_label),
            num_workers=0,
            collate_fn=paired_collate_fn,
            batch_size=128,
            shuffle=False
        )
        test_acc, score, pred, test_time = test_epoch(model, test_data)
        with open('atten_%dAdamW.txt' % i, 'w') as f2:
            f2.write(' '.join(map('{:.4f}'.format, score)))

        # write F1, PRECISION, RECALL
        with open('metric_%dAdamW.txt' % i, 'w') as f3:
            f3.write('F1 PRE REC\n')
            p, r, fscore, _ = precision_recall_fscore_support(test_label, pred)
            for a, b, c in zip(fscore, p, r):
                # for every cls
                f3.write('%.2f %.2f %.2f\n' % (a, b, c))
                f3.flush()
            if len(fscore) != len(protocols):
                a = set(pred)
                b = set(test_label[:, 0])
                f3.write('%s\n%s' % (str(a), str(b)))

        # write Confusion Matrix
        with open('cm_%dAdamW.pkl' % i, 'wb') as f4:
            pickle.dump(confusion_matrix(test_label, pred, normalize='true'), f4)

        # write ACC
        f.write('%.2f %.4f %.6f %.2f\n' % (train_acc, train_loss, test_time, test_acc))
        f.flush()

    f.close()



In [28]:
for i in range(1):
		with open('pro_flows_%d_noip_fold.pkl'%i, 'rb') as f:
			flow_dict = pickle.load(f)
		print('====', i, ' fold validation ====')
		mainAdamW(i, flow_dict)

==== 0  fold validation ====


                                                                        