In [197]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import re
import tensorflow as tf
from tensorflow.keras.models import Sequential , load_model
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, LSTM, Dense, Flatten, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint

In [198]:
train=pd.read_csv('train_dataset.csv')
test=pd.read_csv('test_dataset.csv')
train.info()
train.nunique()
train['slice Type'].value_counts()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31583 entries, 0 to 31582
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   LTE/5g Category       31583 non-null  int64  
 1   Time                  31583 non-null  int64  
 2   Packet Loss Rate      31583 non-null  float64
 3   Packet delay          31583 non-null  int64  
 4   IoT                   31583 non-null  int64  
 5   LTE/5G                31583 non-null  int64  
 6   GBR                   31583 non-null  int64  
 7   Non-GBR               31583 non-null  int64  
 8   AR/VR/Gaming          31583 non-null  int64  
 9   Healthcare            31583 non-null  int64  
 10  Industry 4.0          31583 non-null  int64  
 11  IoT Devices           31583 non-null  int64  
 12  Public Safety         31583 non-null  int64  
 13  Smart City & Home     31583 non-null  int64  
 14  Smart Transportation  31583 non-null  int64  
 15  Smartphone         

1    16799
3     7392
2     7392
Name: slice Type, dtype: int64

In [199]:
# Perform data preprocessing and optimization
def reduce_mem_usage(train):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.
    """
    start_mem = train.memory_usage().sum() / 1024**2
    #print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))

    for col in train.columns:
        col_type = train[col].dtype

        if col_type != object:
            c_min = train[col].min()
            c_max = train[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    train[col] = train[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    train[col] = train[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    train[col] = train[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    train[col] = train[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    train[col] = train[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    train[col] = train[col].astype(np.float32)
                else:
                    train[col] = train[col].astype(np.float64)
        else:
            train[col] = train[col].astype('object')

    end_mem = train.memory_usage().sum() / 1024**2
    #print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    #print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return train

In [200]:
# Reduce memory usage of the train dataset
train = reduce_mem_usage(train)
train.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31583 entries, 0 to 31582
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   LTE/5g Category       31583 non-null  int8   
 1   Time                  31583 non-null  int8   
 2   Packet Loss Rate      31583 non-null  float16
 3   Packet delay          31583 non-null  int16  
 4   IoT                   31583 non-null  int8   
 5   LTE/5G                31583 non-null  int8   
 6   GBR                   31583 non-null  int8   
 7   Non-GBR               31583 non-null  int8   
 8   AR/VR/Gaming          31583 non-null  int8   
 9   Healthcare            31583 non-null  int8   
 10  Industry 4.0          31583 non-null  int8   
 11  IoT Devices           31583 non-null  int8   
 12  Public Safety         31583 non-null  int8   
 13  Smart City & Home     31583 non-null  int8   
 14  Smart Transportation  31583 non-null  int8   
 15  Smartphone         

In [201]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31584 entries, 0 to 31583
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   LTE/5g Category       31584 non-null  int64  
 1   Time                  31584 non-null  int64  
 2   Packet Loss Rate      31584 non-null  float64
 3   Packet delay          31584 non-null  int64  
 4   IoT                   31584 non-null  int64  
 5   LTE/5G                31584 non-null  int64  
 6   GBR                   31584 non-null  int64  
 7   Non-GBR               31584 non-null  int64  
 8   AR/VR/Gaming          31584 non-null  int64  
 9   Healthcare            31584 non-null  int64  
 10  Industry 4.0          31584 non-null  int64  
 11  IoT Devices           31584 non-null  int64  
 12  Public Safety         31584 non-null  int64  
 13  Smart City & Home     31584 non-null  int64  
 14  Smart Transportation  31584 non-null  int64  
 15  Smartphone         

In [202]:
# Reduce memory usage of the test dataset
test = reduce_mem_usage(test)
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31584 entries, 0 to 31583
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   LTE/5g Category       31584 non-null  int8   
 1   Time                  31584 non-null  int8   
 2   Packet Loss Rate      31584 non-null  float16
 3   Packet delay          31584 non-null  int16  
 4   IoT                   31584 non-null  int8   
 5   LTE/5G                31584 non-null  int8   
 6   GBR                   31584 non-null  int8   
 7   Non-GBR               31584 non-null  int8   
 8   AR/VR/Gaming          31584 non-null  int8   
 9   Healthcare            31584 non-null  int8   
 10  Industry 4.0          31584 non-null  int8   
 11  IoT Devices           31584 non-null  int8   
 12  Public Safety         31584 non-null  int8   
 13  Smart City & Home     31584 non-null  int8   
 14  Smart Transportation  31584 non-null  int8   
 15  Smartphone         

In [203]:
# Rename columns to remove special characters
train = train.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
test = test.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))

In [204]:
train.head() #just listing all columns with first 5 rows.

Unnamed: 0,LTE5gCategory,Time,PacketLossRate,Packetdelay,IoT,LTE5G,GBR,NonGBR,ARVRGaming,Healthcare,Industry40,IoTDevices,PublicSafety,SmartCityHome,SmartTransportation,Smartphone,sliceType
0,14,0,1e-06,10,1,0,0,1,0,0,0,0,1,0,0,0,3
1,18,20,0.001,100,0,1,1,0,1,0,0,0,0,0,0,0,1
2,17,14,1e-06,300,0,1,0,1,0,0,0,0,0,0,0,1,1
3,3,17,0.010002,100,0,1,0,1,0,0,0,0,0,0,0,1,1
4,9,4,0.010002,50,1,0,0,1,0,0,0,0,0,1,0,0,2


In [205]:
test.head() #just listing all columns with first 5 rows.


Unnamed: 0,LTE5gCategory,Time,PacketLossRate,Packetdelay,IoT,LTE5G,GBR,NonGBR,ARVRGaming,Healthcare,Industry40,IoTDevices,PublicSafety,SmartCityHome,SmartTransportation,Smartphone
0,15,17,0.001,100,0,1,1,0,1,0,0,0,0,0,0,0
1,14,18,1e-06,10,1,0,0,1,0,0,0,0,0,0,1,0
2,11,7,0.001,50,1,0,1,0,0,0,1,0,0,0,0,0
3,20,14,0.001,50,1,0,1,0,0,0,1,0,0,0,0,0
4,2,22,0.001,50,0,1,0,1,1,0,0,0,0,0,0,0


In [206]:
y = train.pop('sliceType')
X = train

# Convert non-numeric columns to numeric values
X = X.apply(pd.to_numeric, errors='coerce')

# Split the updated training data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Convert labels to string type
y_train = y_train.astype(str)
y_val = y_val.astype(str)

In [207]:
X_train.shape[1]

16

In [208]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)


In [209]:
num_classes = len(label_encoder.classes_)
num_classes

3

In [210]:
y_train_categorical = to_categorical(y_train_encoded, num_classes)
y_val_categorical = to_categorical(y_val_encoded, num_classes)

In [222]:
# Define the hybrid model without Embedding layer

model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', padding='same', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(128, kernel_size=3, activation='relu', padding='same'),
    MaxPooling1D(pool_size=2),
    Conv1D(256, kernel_size=3, activation='relu', padding='same'),
    MaxPooling1D(pool_size=2),
    LSTM(128, return_sequences=True),
    LSTM(128),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

'''
model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', padding='same', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(128, kernel_size=3, activation='relu', padding='same'),
    MaxPooling1D(pool_size=2),
    Conv1D(256, kernel_size=3, activation='relu', padding='same'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])
'''

"\nmodel = Sequential([\n    Conv1D(64, kernel_size=3, activation='relu', padding='same', input_shape=(X_train.shape[1], 1)),\n    MaxPooling1D(pool_size=2),\n    Conv1D(128, kernel_size=3, activation='relu', padding='same'),\n    MaxPooling1D(pool_size=2),\n    Conv1D(256, kernel_size=3, activation='relu', padding='same'),\n    MaxPooling1D(pool_size=2),\n    Flatten(), \n    Dense(256, activation='relu'),\n    Dropout(0.5),\n    Dense(num_classes, activation='softmax')\n])\n"

In [226]:
# Adjust the learning rate
lr = 0.0001
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Compile the model
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


In [225]:
# Define a callback to save the best model
checkpoint = ModelCheckpoint('model1.h5', monitor='val_accuracy', save_best_only=True, verbose=1)

In [223]:
model.summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_39 (Conv1D)          (None, 16, 64)            256       
                                                                 
 max_pooling1d_39 (MaxPoolin  (None, 8, 64)            0         
 g1D)                                                            
                                                                 
 conv1d_40 (Conv1D)          (None, 8, 128)            24704     
                                                                 
 max_pooling1d_40 (MaxPoolin  (None, 4, 128)           0         
 g1D)                                                            
                                                                 
 conv1d_41 (Conv1D)          (None, 4, 256)            98560     
                                                                 
 max_pooling1d_41 (MaxPoolin  (None, 2, 256)         

In [227]:
history = model.fit(X_train,
                    y_train_categorical,
                    batch_size=32,
                    epochs=5,
                    validation_data=(X_val, y_val_categorical),
                    callbacks=[checkpoint])

Epoch 1/5
Epoch 1: val_accuracy improved from -inf to 1.00000, saving model to model1.h5
Epoch 2/5
Epoch 2: val_accuracy did not improve from 1.00000
Epoch 3/5
Epoch 3: val_accuracy did not improve from 1.00000
Epoch 4/5
Epoch 4: val_accuracy did not improve from 1.00000
Epoch 5/5
Epoch 5: val_accuracy did not improve from 1.00000


In [None]:
# Load the saved model
loaded_model = load_model('model1.h5')


In [None]:
loaded_model.summary()

In [None]:
test = test.apply(pd.to_numeric, errors='coerce')
test

In [None]:
# Make predictions on the test data
predictions = loaded_model.predict(test)

In [None]:
# Assuming you want to get the predicted classes
predicted_classes = predictions.argmax(axis=1)

In [None]:
predicted_classes+1

In [None]:
predicted_classes=predicted_classes+1

In [None]:
prediction=pd.read_csv('predictions.csv')

In [None]:
GroundTruth=prediction['slice Type']
GroundTruth

In [None]:
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(GroundTruth, predicted_classes)
print(f"Accuracy: {accuracy:.2f}")


In [None]:
# Generate and print the classification report
class_report = classification_report(GroundTruth, predicted_classes)
print(class_report)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate confusion matrix
cm = confusion_matrix(GroundTruth, predicted_classes)

# Create a heatmap for the confusion matrix
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='RdPu', xticklabels=np.unique(GroundTruth), yticklabels=np.unique(GroundTruth))
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()