In [1]:
import pandas as pd

In [2]:
file_paths = [
    'blackhole.csv',
    'dodag.csv',
    'flooding.csv',
    'rank.csv'
]

In [None]:
# Load all CSV files into a list of DataFrames
dfs = [pd.read_csv(file) for file in file_paths]

In [4]:
# Concatenate all DataFrames into a single DataFrame
data = pd.concat(dfs,ignore_index=True)

In [5]:
# Display the first few rows
print("Dataset Preview:")
print(data.head())

Dataset Preview:
    time  source  destination  length  info  transmission_rate_per_1000_ms  \
0  0.037      39         9999     0.0   1.0                       0.000000   
1  0.037      39         9999     0.0   1.0                       0.000000   
2  0.038      39         9999     0.0   1.0                       0.671176   
3  0.045      39         9999     0.0   1.0                       0.000000   
4  0.046      39         9999     0.0   1.0                       0.000000   

   reception_rate_per_1000_ms  transmission_average_per_sec  \
0                    0.671176                      0.000000   
1                    0.649873                      0.000000   
2                    0.652361                      0.462516   
3                    0.633786                      0.000000   
4                    0.630378                      0.000000   

   reception_average_per_sec  transmission_count_per_sec  \
0                   0.499879                    0.000000   
1              

In [6]:
# Basic dataset information
print("\nDataset Info:")
print(data.info())


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1639975 entries, 0 to 1639974
Data columns (total 18 columns):
 #   Column                               Non-Null Count    Dtype  
---  ------                               --------------    -----  
 0   time                                 1639975 non-null  float64
 1   source                               1639975 non-null  int64  
 2   destination                          1639975 non-null  int64  
 3   length                               1639975 non-null  float64
 4   info                                 1639975 non-null  float64
 5   transmission_rate_per_1000_ms        1639975 non-null  float64
 6   reception_rate_per_1000_ms           1639975 non-null  float64
 7   transmission_average_per_sec         1639975 non-null  float64
 8   reception_average_per_sec            1639975 non-null  float64
 9   transmission_count_per_sec           1639975 non-null  float64
 10  reception_count_per_sec              1639975 non-nu

In [7]:
# Check for missing values
print("\nMissing Values (Before Handling):")
print(data.isnull().sum())


Missing Values (Before Handling):
time                                   0
source                                 0
destination                            0
length                                 0
info                                   0
transmission_rate_per_1000_ms          0
reception_rate_per_1000_ms             0
transmission_average_per_sec           0
reception_average_per_sec              0
transmission_count_per_sec             0
reception_count_per_sec                0
transmission_total_duration_per_sec    0
reception_total_duration_per_sec       0
dao                                    0
dis                                    0
dio                                    0
category                               0
label                                  0
dtype: int64


In [10]:
data.nunique().sort_values()

label                                       2
category                                    5
info                                       10
length                                     13
destination                               101
source                                    101
dis                                       206
dao                                       233
dio                                       383
transmission_count_per_sec                467
transmission_rate_per_1000_ms             470
reception_total_duration_per_sec          575
reception_rate_per_1000_ms               1248
reception_count_per_sec                  1253
transmission_total_duration_per_sec      1634
reception_average_per_sec                9369
transmission_average_per_sec            21764
time                                   725703
dtype: int64

In [39]:
print(data['info'].sort_values().unique())

[0.         0.41111834 0.52029776 0.55440486 0.57018941 0.57241282
 0.57289431 0.58018146 0.58259798 1.        ]


In [18]:
import tensorflow as tf
from tensorflow.keras import layers

Matplotlib is building the font cache; this may take a moment.


In [35]:
source_lookup = tf.keras.layers.IntegerLookup(output_mode='int')
destination_lookup = tf.keras.layers.IntegerLookup(output_mode='int')
info_lookup = tf.keras.layers.StringLookup(output_mode='int')
length_lookup = tf.keras.layers.StringLookup(output_mode='int')
source_lookup.adapt(data['source'])
destination_lookup.adapt(data['destination'])
info_lookup.adapt(data['info'].astype(str))
length_lookup.adapt(data['length'].astype(str))


In [70]:
source_embedding_layer = layers.Embedding(input_dim=source_lookup.vocabulary_size(), output_dim= 32)
source_input = tf.keras.Input(shape=(1,), dtype=tf.int32)
source_index = source_lookup(source_input)
source_embedding = source_embedding_layer(source_index)
print(source_embedding.shape)

destination_embedding_layer = layers.Embedding(input_dim=destination_lookup.vocabulary_size(), output_dim= 32)
info_embedding_layer = layers.Embedding(input_dim=info_lookup.vocabulary_size(), output_dim= 32)
length_embedding_layer = layers.Embedding(input_dim=length_lookup.vocabulary_size(), output_dim= 32)

(None, 1, 32)


In [49]:
# Convert a value to index in vocabulary
value = tf.constant("0.58018146")
print(type(value))
index = info_lookup(value)

# Get corresponding Embedding vector  
embedding_vector = info_embedding(index)
print(embedding_vector)

<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(
[-0.04899985  0.04083799  0.01216459  0.02252749 -0.0076803  -0.01957971
  0.03763356 -0.01048531 -0.00600383  0.02336247  0.01699139  0.03945199
  0.01549754  0.0269189   0.01121736  0.02182175  0.01574903 -0.03418497
  0.04951834  0.00981645 -0.02337005 -0.02803885  0.04740372  0.04845286
 -0.00362219  0.00021451  0.0259108   0.00962509  0.00182294  0.03295778
  0.03648982 -0.01601095], shape=(32,), dtype=float32)


In [64]:
# Numerical -> Vector 
dis_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='dis')
dis_vector = layers.Dense(32)(dis_input)
dis_vector = layers.Reshape((1, 32))(dis_vector)

dao_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='dao')
dao_vector = layers.Dense(32)(dao_input)
dao_vector = layers.Reshape((1, 32))(dao_vector)

dio_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='dio')
dio_vector = layers.Dense(32)(dio_input)
dio_vector = layers.Reshape((1, 32))(dio_vector)

transmission_count_per_sec_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='transmission_count_per_sec')
transmission_count_per_sec_vector = layers.Dense(32)(transmission_count_per_sec_input)
transmission_count_per_sec_vector = layers.Reshape((1, 32))(transmission_count_per_sec_vector)

transmission_rate_per_1000_ms_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='transmission_rate_per_1000_ms')
transmission_rate_per_1000_ms_vector = layers.Dense(32)(transmission_rate_per_1000_ms_input)
transmission_rate_per_1000_ms_vector = layers.Reshape((1, 32))(transmission_rate_per_1000_ms_vector)

time_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='time')
time_vector = layers.Dense(32)(time_input)
time_vector = layers.Reshape((1, 32))(time_vector)

reception_total_duration_per_sec_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='reception_total_duration_per_sec')
reception_total_duration_per_sec_vector = layers.Dense(32)(reception_total_duration_per_sec_input)
reception_total_duration_per_sec_vector = layers.Reshape((1, 32))(reception_total_duration_per_sec_vector)

reception_rate_per_1000_ms_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='reception_rate_per_1000_ms')
reception_rate_per_1000_ms_vector = layers.Dense(32)(reception_rate_per_1000_ms_input)
reception_rate_per_1000_ms_vector = layers.Reshape((1, 32))(reception_rate_per_1000_ms_vector)

reception_count_per_sec_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='reception_count_per_sec')
reception_count_per_sec_vector = layers.Dense(32)(reception_count_per_sec_input)
reception_count_per_sec_vector = layers.Reshape((1, 32))(reception_count_per_sec_vector)

transmission_total_duration_per_sec_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='transmission_total_duration_per_sec')
transmission_total_duration_per_sec_vector = layers.Dense(32)(transmission_total_duration_per_sec_input)
transmission_total_duration_per_sec_vector = layers.Reshape((1, 32))(transmission_total_duration_per_sec_vector)

reception_average_per_sec_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='reception_average_per_sec')
reception_average_per_sec_vector = layers.Dense(32)(reception_average_per_sec_input)
reception_average_per_sec_vector = layers.Reshape((1, 32))(reception_average_per_sec_vector)

transmission_average_per_sec_input = tf.keras.Input(shape=(1,), dtype=tf.float32, name='transmission_average_per_sec')
transmission_average_per_sec_vector = layers.Dense(32)(transmission_average_per_sec_input)
transmission_average_per_sec_vector = layers.Reshape((1, 32))(transmission_average_per_sec_vector)


In [79]:
concatenated = layers.Concatenate(axis=1)([source_embedding, dis_vector, dao_vector])
print(type(concatenated))

<class 'keras.src.backend.common.keras_tensor.KerasTensor'>


In [81]:
from tensorflow.keras.layers import LayerNormalization

norm_concatenated = layers.LayerNormalization(axis=-1)(concatenated)  # (batch, num_features, embedding_dim)

In [82]:
attention_output = layers.MultiHeadAttention(
    num_heads=4,         # Số lượng "head" attention
    key_dim=32           # Độ dài vector key/query — thường là embedding_dim / num_heads
)(query=norm_concatenated, key=norm_concatenated, value=norm_concatenated)

In [83]:
input_layernorm = layers.Add()([norm_concatenated, attention_output])
output_layernorm = layers.LayerNormalization(epsilon=1e-6)(input_layernorm)

In [84]:
ffn = layers.Dense(128, activation='relu')(output_layernorm)
ffn = layers.Dense(32)(ffn)

# Residual + LayerNorm
x = layers.Add()([output_layernorm, ffn])
x = layers.LayerNormalization(epsilon=1e-6)(x)

In [85]:
x = layers.GlobalAveragePooling1D()(x)  # Shape: (batch, 32)
output = layers.Dense(1, activation='sigmoid')(x)

In [None]:
model = tf.keras.Model(inputs=[source_input, dis_input, ...], outputs=output)