In [8]:
import pyspark
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession, Row
from pyspark.sql import Row
from pyspark.sql.types import StructField
from pyspark.sql.types import StructType
from pyspark.sql.types import StringType
from pyspark.sql.functions import split
from pyspark.sql.functions import lit, concat_ws
from pyspark.sql import SQLContext
from pyspark.sql.functions import when
from pyspark.sql.functions import col
import os
from pyspark.sql.functions import concat_ws,col
import pyspark.sql.functions as f
import tensorflow as tf

In [9]:
from pyspark.sql import SparkSession

In [10]:
spark = SparkSession.builder.appName('Data').getOrCreate()

In [11]:
spark

In [12]:
df1 = spark.read.csv("TLS_03-02-2022_30mintrace-TCP-only-sni.csv")
df1.printSchema()
print(df1.count())

root
 |-- _c0: string (nullable = true)

144


In [13]:
df1 = df1.withColumn('SourceIP', split(df1['_c0'], '\t').getItem(0)) \
        .withColumn('SourcePort', split(df1['_c0'], '\t').getItem(1)) \
        .withColumn('DestinationIP', split(df1['_c0'], '\t').getItem(2)) \
        .withColumn('DestinationPort', split(df1['_c0'], '\t').getItem(3)) \
        .withColumn('Protocol', split(df1['_c0'], '\t').getItem(4)) \
        .withColumn('ServerNameIndication', split(df1['_c0'], '\t').getItem(5)) 
df1.show(truncate = False)
df1.printSchema()

    

+--------------------------------------------------------------------------------+-------------+----------+---------------+---------------+--------+-----------------------------------+
|_c0                                                                             |SourceIP     |SourcePort|DestinationIP  |DestinationPort|Protocol|ServerNameIndication               |
+--------------------------------------------------------------------------------+-------------+----------+---------------+---------------+--------+-----------------------------------+
|169.231.79.75\t52670\t31.13.70.14\t443\t6\tvideo-lax3-1.xx.fbcdn.net            |169.231.79.75|52670     |31.13.70.14    |443            |6       |video-lax3-1.xx.fbcdn.net          |
|169.231.79.75\t52671\t31.13.70.23\t443\t6\tgateway.facebook.com                 |169.231.79.75|52671     |31.13.70.23    |443            |6       |gateway.facebook.com               |
|169.231.79.75\t52672\t157.240.11.18\t443\t6\tvideo-lax3-2.xx.fbcdn.net    

In [14]:
df1 = df1.drop("_c0")
df1=df1.select(concat_ws('-',df1.SourceIP,df1.DestinationIP,df1.SourcePort,df1.DestinationPort,df1.Protocol).alias("FlowID"), "SourceIP", "SourcePort", "DestinationIP", "DestinationPort","ServerNameIndication")
df1.show(truncate=False)
df1.printSchema()

+-----------------------------------------+-------------+----------+---------------+---------------+-----------------------------------+
|FlowID                                   |SourceIP     |SourcePort|DestinationIP  |DestinationPort|ServerNameIndication               |
+-----------------------------------------+-------------+----------+---------------+---------------+-----------------------------------+
|169.231.79.75-31.13.70.14-52670-443-6    |169.231.79.75|52670     |31.13.70.14    |443            |video-lax3-1.xx.fbcdn.net          |
|169.231.79.75-31.13.70.23-52671-443-6    |169.231.79.75|52671     |31.13.70.23    |443            |gateway.facebook.com               |
|169.231.79.75-157.240.11.18-52672-443-6  |169.231.79.75|52672     |157.240.11.18  |443            |video-lax3-2.xx.fbcdn.net          |
|169.231.79.75-142.250.72.142-52673-443-6 |169.231.79.75|52673     |142.250.72.142 |443            |play.google.com                    |
|169.231.79.75-20.190.151.69-52674-443-6 

In [15]:
df_1 = spark.read.option("header",True) \
     .csv("TLS_03-02-2022_30mintrace-TCP-only.pcap_Flow.csv")
df_1.printSchema()

root
 |-- Flow ID: string (nullable = true)
 |-- Src IP: string (nullable = true)
 |-- Src Port: string (nullable = true)
 |-- Dst IP: string (nullable = true)
 |-- Dst Port: string (nullable = true)
 |-- Protocol: string (nullable = true)
 |-- Timestamp: string (nullable = true)
 |-- Flow Duration: string (nullable = true)
 |-- Tot Fwd Pkts: string (nullable = true)
 |-- Tot Bwd Pkts: string (nullable = true)
 |-- TotLen Fwd Pkts: string (nullable = true)
 |-- TotLen Bwd Pkts: string (nullable = true)
 |-- Fwd Pkt Len Max: string (nullable = true)
 |-- Fwd Pkt Len Min: string (nullable = true)
 |-- Fwd Pkt Len Mean: string (nullable = true)
 |-- Fwd Pkt Len Std: string (nullable = true)
 |-- Bwd Pkt Len Max: string (nullable = true)
 |-- Bwd Pkt Len Min: string (nullable = true)
 |-- Bwd Pkt Len Mean: string (nullable = true)
 |-- Bwd Pkt Len Std: string (nullable = true)
 |-- Flow Byts/s: string (nullable = true)
 |-- Flow Pkts/s: string (nullable = true)
 |-- Flow IAT Mean: string (

In [16]:
df = spark.read.csv("TLS-timeseries/TLS1.csv")
df = df.withColumn('SourceIP', split(df['_c0'], '\t').getItem(0)) \
        .withColumn('SourcePort', split(df['_c0'], '\t').getItem(1)) \
        .withColumn('DestinationIP', split(df['_c0'], '\t').getItem(2)) \
        .withColumn('DestinationPort', split(df['_c0'], '\t').getItem(3)) \
        .withColumn('Protocol', split(df['_c0'], '\t').getItem(4)) \
        .withColumn('ArrivalTime', split(df['_c1'], '\t').getItem(0)) \
        .withColumn('PacketLength', split(df['_c1'], '\t').getItem(1))
df.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)
 |-- SourceIP: string (nullable = true)
 |-- SourcePort: string (nullable = true)
 |-- DestinationIP: string (nullable = true)
 |-- DestinationPort: string (nullable = true)
 |-- Protocol: string (nullable = true)
 |-- ArrivalTime: string (nullable = true)
 |-- PacketLength: string (nullable = true)



In [17]:
def direction(source):
    if source == "169.231.79.75":
        return 0
    else:
        return 1

In [18]:
from pyspark.sql.functions import split
l_t =[]
flow_time = []
for i in range (1, 155):
    strx = "TLS-timeseries/TLS"+str(i)+".csv"
    df_test = spark.read.csv(strx)
    if df_test.count() < 3:
        continue
    df_test = df_test.withColumn('SourceIP', split(df_test['_c0'], '\t').getItem(0)) \
        .withColumn('SourcePort', split(df_test['_c0'], '\t').getItem(1)) \
        .withColumn('DestinationIP', split(df_test['_c0'], '\t').getItem(2)) \
        .withColumn('DestinationPort', split(df_test['_c0'], '\t').getItem(3)) \
        .withColumn('Protocol', split(df_test['_c0'], '\t').getItem(4)) \
        .withColumn('ArrivalTime', split(df_test['_c1'], '\t').getItem(0)) \
        .withColumn('PacketLength', split(df_test['_c1'], '\t').getItem(1))
    df_test= df_test.drop("_c0")
    df_test = df_test.drop("_c1")
    df_test=df_test.select(concat_ws('-',df_test.SourceIP,df_test.DestinationIP,df_test.SourcePort,df_test.DestinationPort,df_test.Protocol).alias("FlowID"), "SourceIP", "DestinationIP","ArrivalTime", "PacketLength")
    df_test=df_test.withColumn("Direction", lit(None))
    my_udf = f.udf(direction, StringType())
    df_test= df_test.withColumn('Direction', my_udf(f.col("SourceIP")))
    df_test = df_test.drop("SourceIP")
    df_test = df_test.drop("DestinationIP")
    flow_time.append(df_test.collect()[0]['FlowID'])
    rdd5 = df_test.rdd
    b = rdd5.map(list)
    l_t.append(b)
print(len(l_t), len(flow_time))

    

    

154 154


In [33]:
time=[]
for i in l_t:
    l=[]
    for j in i.collect():
        l.append(j)
    time.append(l[:1024])
print(len(time))        

154


In [20]:
#!ls /home/navya/home/navya/anaconda3/TLS/TLS_03-02-2022_30mintrace1_splitted

In [21]:
import re
def label(row):
    row = str(row)
    if re.search(r'(video|netflix|prime|nflx|youtube|movies|music|song|songs|mp3|fm|stream|audio|beats|tracks|live|zoom)', row):
        result = 'streaming'
    elif re.search(r'(chat|web|messenger|discord)', row):
        result = 'chats'
    elif re.search(r'(game|games|gamer|casino|juegos)', row):
        result = 'games'
    elif re.search(r'(facebook|instagram|twitter|whatsapp|habbo|linkedin|pinterest|tiktok|yelp)', row):
        result = 'social'
    elif re.search(r'(download|downloads|file|files)', row):
        result = 'downloads'
    elif re.search(r'(mail|email|inbox|login|outlook|gmail|hotmail|smtp|webmail)', row):
        result = 'webmail'
    else:
        result = 'other'
    return result



In [22]:
import scapy
import numpy

In [23]:
from scapy.all import *
import numpy as np
lst3=[]
a=6
for i in range (1, 155):
    if i <10:
        strx = "TLS/TLS_03-02-2022_30mintrace1_splitted/TLS_03-02-2022_30mintrace1-000"+str(i)+".pcap"
    elif i<100:
        strx = "TLS/TLS_03-02-2022_30mintrace1_splitted/TLS_03-02-2022_30mintrace1-00"+str(i)+".pcap"
    else:
        strx = "TLS/TLS_03-02-2022_30mintrace1_splitted/TLS_03-02-2022_30mintrace1-0"+str(i)+".pcap"
    pcap = rdpcap(strx)
    pl = PacketList([p for p in pcap])
    if len(pl)<3:
        continue
    lst1=[] #p[IP].fields['dst'], p[IP].fields['src'], p[TCP].fields['dport'], p[TCP].fields['sport']
    lst1.append(pl[0][IP].fields['src'])
    lst1.append(pl[0][IP].fields['dst'])
    lst1.append(str(pl[0][TCP].fields['sport']))
    lst1.append(str(pl[0][TCP].fields['dport']))
    lst1.append(str(a))
    lst2=[]
    xp=(bytes(pl[0]))
    lst2.append(np.array([int(x) for x in xp]))
    yp=(bytes(pl[1]))
    lst2.append(np.array([int(x) for x in yp]))
    zp=(bytes(pl[2]))
    lst2.append(np.array([int(x) for x in zp]))
    arr = np.concatenate((lst2[0], lst2[1], lst2[2]), axis = 0)
    lst1.append(arr)
    lst3.append(lst1)
print(lst3)
        


[['169.231.79.75', '31.13.70.1', '52633', '443', '6', array([  0,   1,   0,   1,  48,   2, 208, 126,  53, 103, 143, 154,   8,
         0,  69,   0,   0,  72,  19, 238,  64,   0,  64,   6, 200, 129,
       169, 231,  79,  75,  31,  13,  70,   1, 205, 153,   1, 187, 247,
       100,  94,  86, 206, 214, 175,  46,  80,  24,   0, 254, 160,  57,
         0,   0,  23,   3,   3,   0,  27,  56,  28, 186,  33,  40, 104,
        73,   6, 251, 211, 219, 198, 119, 103, 177, 113, 255,  98,  14,
        91,  50,   6, 118,  27,  49, 215, 208, 208, 126,  53, 103, 143,
       154,   0,   1,   0,   1,  48,   2,   8,   0,  69,   0,   0,  68,
        47, 126,  64,   0,  89,   6, 147, 245,  31,  13,  70,   1, 169,
       231,  79,  75,   1, 187, 205, 153, 206, 214, 175,  46, 247, 100,
        94, 118,  80,  24,   1,  18, 196, 152,   0,   0,  23,   3,   3,
         0,  23, 143, 172, 147,  39,  91,  24,  14, 231,  49, 238, 142,
        84,  60,  99, 159,  51, 100, 247, 230,  49, 156, 224, 124,   0,
         1

In [24]:
list1= []
lst1=[]
list2=[]
for i in lst3:
    list2.append(i[5])
    i.pop(5)
print(len(list2))

151


In [25]:
print(list2[0])

[  0   1   0   1  48   2 208 126  53 103 143 154   8   0  69   0   0  72
  19 238  64   0  64   6 200 129 169 231  79  75  31  13  70   1 205 153
   1 187 247 100  94  86 206 214 175  46  80  24   0 254 160  57   0   0
  23   3   3   0  27  56  28 186  33  40 104  73   6 251 211 219 198 119
 103 177 113 255  98  14  91  50   6 118  27  49 215 208 208 126  53 103
 143 154   0   1   0   1  48   2   8   0  69   0   0  68  47 126  64   0
  89   6 147 245  31  13  70   1 169 231  79  75   1 187 205 153 206 214
 175  46 247 100  94 118  80  24   1  18 196 152   0   0  23   3   3   0
  23 143 172 147  39  91  24  14 231  49 238 142  84  60  99 159  51 100
 247 230  49 156 224 124   0   1   0   1  48   2 208 126  53 103 143 154
   8   0  69   0   0  72  19 245  64   0  64   6 200 122 169 231  79  75
  31  13  70   1 205 153   1 187 247 100  94 118 206 214 175  74  80  24
   0 254  80 214   0   0  23   3   3   0  27  77 141 255 181 197  81  29
 130  10  65  39 128 173  46 203  52 232  94 203 20

In [26]:
flow=[]
for i in lst3:
    str1 = "-".join(i)
    flow.append(str1)
print(len(flow))

151


In [27]:
df_1=df_1.withColumn("SNI", lit(None))
df_1.show(truncate=False)


+----------------------------------------+-------------+--------+--------------+--------+--------+----------------------+-------------+------------+------------+---------------+---------------+---------------+---------------+------------------+------------------+---------------+---------------+------------------+------------------+------------------+-------------------+------------------+------------------+------------+------------+------------+--------------------+--------------------+-----------+-----------+------------+--------------------+--------------------+-----------+-----------+-------------+-------------+-------------+-------------+--------------+--------------+-------------------+-------------------+-----------+-----------+------------------+------------------+------------------+------------+------------+------------+------------+------------+------------+--------------+------------+-------------+------------------+------------------+------------------+--------------+-------

In [28]:
c = df1.count()
for i in range(0, c):
    str1 = df1.collect()[i]['ServerNameIndication']
    str2 = df1.collect()[i]['FlowID']
    df_1 = df_1.withColumn("SNI",when(col("Flow ID") == str2,str1).otherwise(col("SNI")))
df_1.show()

+--------------------+-------------+--------+--------------+--------+--------+--------------------+-------------+------------+------------+---------------+---------------+---------------+---------------+------------------+------------------+---------------+---------------+------------------+------------------+------------------+-------------------+------------------+------------------+------------+------------+------------+--------------------+--------------------+-----------+-----------+------------+--------------------+--------------------+-----------+-----------+-------------+-------------+-------------+-------------+--------------+--------------+-------------------+-------------------+-----------+-----------+------------------+------------------+------------------+------------+------------+------------+------------+------------+------------+--------------+------------+-------------+------------------+------------------+------------------+--------------+--------------+--------------

In [29]:
import pyspark.sql.functions as f
my_udf = f.udf(label, StringType())
df_1 = df_1.withColumn('Label', my_udf(f.col("SNI")))
df_1.show()

+--------------------+-------------+--------+--------------+--------+--------+--------------------+-------------+------------+------------+---------------+---------------+---------------+---------------+------------------+------------------+---------------+---------------+------------------+------------------+------------------+-------------------+------------------+------------------+------------+------------+------------+--------------------+--------------------+-----------+-----------+------------+--------------------+--------------------+-----------+-----------+-------------+-------------+-------------+-------------+--------------+--------------+-------------------+-------------------+-----------+-----------+------------------+------------------+------------------+------------+------------+------------+------------+------------+------------+--------------+------------+-------------+------------------+------------------+------------------+--------------+--------------+--------------

In [30]:
df_1=df_1.dropDuplicates((['Flow ID']))
df_1.count()

153

In [31]:
raw_flow =[]
raw_byte =[]
index = []
for i in range(0, df_1.count()):
    str1 = df_1.collect()[i]['Flow ID']
    flag =0
    for j in range(0, 151):
         if str1 == flow[j]:
                raw_flow.append(flow[j])
                raw_byte.append(list2[j])
                index.append(j)
                flag =1
    if flag ==0:
        raw_flow.append(str1)
        x = np.random.randint(0,255,(1800,))
        raw_byte.append(x)
            
print(df_1.collect()[0]['Flow ID'])
print(raw_flow[0], raw_byte[0])
print(len(raw_flow), len(raw_byte), len(index))


140.82.112.21-169.231.79.75-443-52694-6
140.82.112.21-169.231.79.75-443-52694-6 [204 145 101 ... 254 224  58]
153 153 115


In [34]:
print(flow_time[1] + "    " + time[0][1][0])

169.231.79.75-198.189.255.82-58426-443-6    198.189.255.82-169.231.79.75-443-58426-6


In [35]:
time_array =[]
count = 0
count1 =0
for i in range(0, df_1.count()):
    str1 = df_1.collect()[i]['Flow ID']
    flag =0
    for j in range(0, 154):
        if str1 == flow_time[j]:
            time_array.append(time[j])
            flag = 1
print(len(time_array))

154


In [36]:
time_array = time_array[:153]

In [37]:
print(time_array[0][0], time_array[1][0])

['169.231.79.75-198.189.255.82-58426-443-6', ' 2022 12:58:45.369756000 PST', '571', '0'] ['169.231.79.75-198.189.255.82-58426-443-6', ' 2022 12:58:45.369756000 PST', '571', '0']


In [38]:
print(raw_flow[0], raw_flow[1])

140.82.112.21-169.231.79.75-443-52694-6 140.82.114.26-169.231.79.75-443-52693-6


In [39]:
train_label =[]
test_label=[]
train_list=[]
test_list=[]
train_timeseries =[]
test_timeseries =[]
for i in range(0, int(0.8*151)):
        strn = df_1.collect()[i]['Label']
        train_list.append(raw_byte[i])
        train_timeseries.append(time_array[i])
        if strn == 'chats':
                v = 1
        elif strn == 'streaming':
                v=2
        elif strn == 'downloads':
                v=3
        elif strn == 'games':
                v=4
        elif strn == 'webmail':
                v=5
        elif strn == 'social':
                v=6
        elif strn == 'other':
                v=7
        train_label.append(v)

for i in range(int(0.8*151)+1, 151):
        strn = df_1.collect()[i]['Label']
        test_list.append(raw_byte[i])
        test_timeseries.append(time_array[i])
        if strn == 'chats':
                v = 1
        elif strn == 'streaming':
                v=2
        elif strn == 'downloads':
                v=3
        elif strn == 'games':
                v=4
        elif strn == 'webmail':
                v=5
        elif strn == 'social':
                v=6
        elif strn == 'other':
                v=7
        test_label.append(v)


In [40]:
print(len(train_label), len(test_label), len(train_list), len(test_list), len(train_timeseries), len(test_timeseries))

120 30 120 30 120 30


In [41]:
for i in train_timeseries:
    for j in i:
        j.pop(0)
for i in test_timeseries:
    for j in i:
        j.pop(0)
print(test_timeseries[0][0])

[' 2022 12:58:45.369756000 PST', '571', '0']


In [42]:
df_1 = df_1.drop("Flow ID")
df_1 = df_1.drop("SNI")
df_1 = df_1.drop("Src IP")
df_1 = df_1.drop("Src Port")
df_1 = df_1.drop("Dst IP")
df_1 = df_1.drop("Dst Port")
df_1 = df_1.drop("Protocol")
df_1 = df_1.drop("Timestamp")
df_1.show(truncate=False)

+-------------+------------+------------+---------------+---------------+---------------+---------------+------------------+------------------+---------------+---------------+------------------+------------------+------------------+-------------------+------------------+--------------------+------------+------------+------------+------------------+--------------------+-----------+-----------+------------+-----------------+--------------------+-----------+-----------+-------------+-------------+-------------+-------------+--------------+--------------+-------------------+-------------------+-----------+-----------+------------------+------------------+------------------+------------+------------+------------+------------+------------+------------+--------------+------------+-------------+------------------+------------------+------------------+--------------+--------------+----------------+--------------+--------------+----------------+----------------+----------------+----------------+

In [43]:
import pandas as pd
df19 = df_1.toPandas()

In [44]:
n = 3
df19 = df19.iloc[:-n] 
print(df19.shape[0])

150


In [45]:
import numpy as np
from sklearn.model_selection import train_test_split
y = df19.pop('Label')
X = df19

In [46]:
X_train,X_test,y_train,y_test = train_test_split(X.index, y,test_size=0.2)
X_train = X.iloc[X_train]
X_test = X.iloc[X_test]

In [47]:
print(X_train.shape[0], X_test.shape[0])

120 30


In [48]:
print(X_test)

    Flow Duration Tot Fwd Pkts Tot Bwd Pkts TotLen Fwd Pkts TotLen Bwd Pkts  \
0          159898            5            9          2039.0          4822.0   
11          18063            1            3            64.0          1432.0   
64       76446399           17          388         12476.0        506026.0   
95       46016945            6            8           528.0           229.0   
35       15110175            2            9          1269.0          7686.0   
107      65139300            9           12          5673.0          2006.0   
133      60054308            2          757           788.0       1038656.0   
139      60147582            2           17           728.0         16607.0   
22          71322            1            6            64.0          5806.0   
48       30098516            7           21           938.0         12836.0   
141      64359498            5          235          2679.0        311793.0   
74          12722            1            5         

In [49]:
train_list1 =[]
test_list1 =[]
for i in train_list:
    if len(i)>1800:
        i = i[:1800]
    elif len(i)<1800:
        i = np.pad(i, (0, 1800-len(i)), 'constant')
    train_list1.append(i)
print(len(train_list1))

120


In [50]:
for i in test_list:
    if len(i)>1800:
        i = i[:1800]
    elif len(i)<1800:
        i = np.pad(i, (0, 1800-len(i)), 'constant')
    test_list1.append(i)
print(len(test_list1))

30


In [51]:
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense, Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.pooling import MaxPooling1D
from keras.layers.recurrent import LSTM
from keras.layers.merge import concatenate

Using TensorFlow backend.


In [52]:
visible = a_layer = Input(shape=(76, ) , name="a")
hidden1 = Dense(200, activation="relu")(visible)
output1 = Dense(200, activation="relu")(hidden1)

In [53]:
visible_l = b_layer = Input(shape=(1024, 3) , name="b")
hidden1_l = Dense(512, activation="relu", name="m1")(visible_l)
hidden2_l = LSTM(256, return_sequences=True)(hidden1_l)
hidden3_l = LSTM(256, return_sequences=True)(hidden2_l)
hidden4_l = LSTM(256)(hidden3_l)

In [54]:
visible = c_layer = Input(shape=(1800, 1), name="c")
conv1 = Conv1D(256, kernel_size=2, activation='relu')(visible)
conv2 = Conv1D(256, kernel_size=2, activation='relu')(conv1)
pool1 = MaxPooling1D(pool_size=(2,))(conv2)
conv3 = Conv1D(128, kernel_size=2, activation='relu')(pool1)
conv4 = Conv1D(128, kernel_size=2, activation='relu')(conv3)
pool2 = MaxPooling1D(pool_size=(2,))(conv4)
output2 = Flatten()(pool2)

In [55]:
merge = concatenate([output1, hidden4_l, output2])
hiddeno_1=Dense(128, activation='relu')(merge)
hiddeno_2=Dense(128, activation='relu')(hiddeno_1)
output_final = Dense(8, activation='softmax')(hiddeno_2)
model = Model(inputs=[a_layer, b_layer, c_layer], outputs=output_final)

In [56]:
dumb_c = np.empty(shape=(120, 1800, 1))
dumb_b = np.empty(shape=(120, 1024, 3))
dumb_a = np.empty(shape=(120, 77))
dumb_y = np.empty(shape=(120, 7))

In [57]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [58]:
train_list1 = numpy.array(train_list1)
print(train_list1.shape)
test_list1 = numpy.array(test_list1)
print(test_list1.shape)

(120, 1800)
(30, 1800)


In [59]:
#model.fit(x={'a': dumb_a, 'b': dumb_b, 'c': dumb_c}, y=dumb_y, epochs=1, verbose=True)

In [60]:
from keras.utils import to_categorical
y_binary = to_categorical(train_label)

In [61]:
y_binaryt = to_categorical(test_label)

In [62]:
from numpy import zeros, newaxis
train_list1 = train_list1[:, :, newaxis]
print(train_list1.shape)
test_list1 = test_list1[:, :, newaxis]
print(test_list1.shape)

(120, 1800, 1)
(30, 1800, 1)


In [63]:
def splt(str1):
    str1 = str1.split(" ")
    p = str1[2].split(":")
    const = float(p[2])
    return const


In [81]:
train_timeseries1 =[]
for i in train_timeseries:
    temp=[]
    for j in i:
        k=0
        temp1=[]
        while(k<3):
            j[1] = int(j[1])
            temp1.append(j[1])
            k=k+1
            j[2] = int(j[2])
            temp1.append(j[2])
            k=k+1
            const = splt(j[0])
            temp1.append(const)
            k=k+1
        temp.append(temp1)
    train_timeseries1.append(temp)
print(len(train_timeseries1))

120


In [82]:
test_timeseries1 =[]
for i in test_timeseries:
    temp=[]
    for j in i:
        k=0
        temp1=[]
        while(k<3):
            j[1] = int(j[1])
            temp1.append(j[1])
            k=k+1
            j[2] = int(j[2])
            temp1.append(j[2])
            k=k+1
            const = splt(j[0])
            temp1.append(const)
            k=k+1
        temp.append(temp1)
    test_timeseries1.append(temp)
print(len(test_timeseries1))

30


In [147]:
import pickle
with open('timeseries1.pickle', 'wb') as f:
    pickle.dump(train_timeseries1, f)
with open('timeseries2.pickle', 'wb') as f:
    pickle.dump(test_timeseries1, f)


In [59]:
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

In [60]:
callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

In [62]:
model.fit(x={"a": X_train, "b": train_timeseries1, "c": train_list1}, y=y_binary, epochs=40, callbacks=[callback], verbose=1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.callbacks.History at 0x7f35a1b365e0>

In [64]:
import pickle
with open('flow_bytes.pickle', 'wb') as f:
    pickle.dump(train_list1, f)
with open('flow_timeseries.pickle', 'wb') as f:
    pickle.dump(train_timeseries1, f)
with open('flow_statistics.pickle', 'wb') as f:
    pickle.dump(X_train, f)

In [100]:
model.evaluate(x={"a": X_test, "b": test_timeseries1, "c": test_list1}, y=y_binaryt)



[2.704594850540161, 0.30000001192092896]

In [74]:
type(test_list1)

numpy.ndarray

In [None]:
def splt(str1):
    str1 = str1.split(" ")
    p = str1[2].split(":")
    const = float(p[2])
    return const


In [None]:
print(strx1[2])

In [None]:
p=strx1[2].split(":")

In [None]:
print(float(p[2]))

In [None]:
train_timeseries[0][0]