## Network Slicing Prediction

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib

Using matplotlib backend: QtAgg


In [6]:
train = pd.read_csv("train_dataset.csv")
test = pd.read_csv("test_dataset.csv")

In [7]:
train.shape, test.shape

((31583, 17), (31584, 16))

In [8]:
train.columns

Index(['LTE/5g Category', 'Time', 'Packet Loss Rate', 'Packet delay', 'IoT',
       'LTE/5G', 'GBR', 'Non-GBR', 'AR/VR/Gaming', 'Healthcare',
       'Industry 4.0', 'IoT Devices', 'Public Safety', 'Smart City & Home',
       'Smart Transportation', 'Smartphone', 'slice Type'],
      dtype='object')

In [10]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31583 entries, 0 to 31582
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   LTE/5g Category       31583 non-null  int64  
 1   Time                  31583 non-null  int64  
 2   Packet Loss Rate      31583 non-null  float64
 3   Packet delay          31583 non-null  int64  
 4   IoT                   31583 non-null  int64  
 5   LTE/5G                31583 non-null  int64  
 6   GBR                   31583 non-null  int64  
 7   Non-GBR               31583 non-null  int64  
 8   AR/VR/Gaming          31583 non-null  int64  
 9   Healthcare            31583 non-null  int64  
 10  Industry 4.0          31583 non-null  int64  
 11  IoT Devices           31583 non-null  int64  
 12  Public Safety         31583 non-null  int64  
 13  Smart City & Home     31583 non-null  int64  
 14  Smart Transportation  31583 non-null  int64  
 15  Smartphone         

In [16]:
print(f"Training dataset Missing Value status \n\n{train.isna().sum()}\n")
print(f"Testing dataset Missing Value status \n\n{test.isna().sum()}")

Training dataset Missing Value status 

LTE/5g Category         0
Time                    0
Packet Loss Rate        0
Packet delay            0
IoT                     0
LTE/5G                  0
GBR                     0
Non-GBR                 0
AR/VR/Gaming            0
Healthcare              0
Industry 4.0            0
IoT Devices             0
Public Safety           0
Smart City & Home       0
Smart Transportation    0
Smartphone              0
slice Type              0
dtype: int64

Testing dataset Missing Value status 

LTE/5g Category         0
Time                    0
Packet Loss Rate        0
Packet delay            0
IoT                     0
LTE/5G                  0
GBR                     0
Non-GBR                 0
AR/VR/Gaming            0
Healthcare              0
Industry 4.0            0
IoT Devices             0
Public Safety           0
Smart City & Home       0
Smart Transportation    0
Smartphone              0
dtype: int64


#### LTE/5G Category Probability Distribution with slice Type

In [55]:
fig = plt.figure(figsize = (20, 10))

# For training dataset
plt.subplot(2,1,1)
plt.title("Train Dataset 'LTE/5g Category' Probabilistic Distribution with 'slice Type'", 
          fontsize=10)
train_lte_hist = sns.histplot(data = train, 
                              x = "LTE/5g Category", 
                              stat = "probability",
                            hue = "slice Type")
plt.xticks(train["LTE/5g Category"].value_counts().index, fontsize = 10)
plt.xlabel("", fontsize = 10)
plt.ylabel("Probability", fontsize = 10)
plt.yticks(np.arange(0.000, 0.030, 0.005), fontsize = 10)
plt.setp(train_lte_hist.get_legend().get_texts(), fontsize = '10')
plt.setp(train_lte_hist.get_legend().get_title(), fontsize = '10')

# for the test sets
plt.subplot(2,1,2)
plt.title("Test data 'LTE/5g Category' Probability Distribution",
         fontsize = 10)
sns.histplot(data = test, x = 'LTE/5g Category', stat = "probability")
plt.xticks(test['LTE/5g Category'].value_counts().index, fontsize = 10)
plt.xlabel("LTE/5g Category", fontsize = 10)
plt.ylabel("Probability", fontsize = 10)
plt.yticks(np.arange(0.000, 0.030, 0.01), fontsize = 10)

([<matplotlib.axis.YTick at 0x7ff211884dc0>,
  <matplotlib.axis.YTick at 0x7ff211884760>,
  <matplotlib.axis.YTick at 0x7ff2118f2a70>],
 [Text(0, 0.0, '0.00'), Text(0, 0.01, '0.01'), Text(0, 0.02, '0.02')])

#### Time Feature with slice Type

In [69]:
time_type_slice = pd.concat([train["Time"], train['slice Type']], axis = 1)
time = time_type_slice.value_counts().index.get_level_values(0)
slice_type = time_type_slice.value_counts().index.get_level_values(1)

In [71]:
plt.title("Train data 'Time' with 'slice Type'")
sns.scatterplot(x = time, y = slice_type, 
                hue = time_type_slice.value_counts().values)
plt.show()

In [74]:
print(train["Time"].describe())

count    31583.000000
mean        11.476459
std          6.915643
min          0.000000
25%          6.000000
50%         11.000000
75%         17.000000
max         23.000000
Name: Time, dtype: float64


In [81]:
plt.figure(figsize = (18, 12))
sns.scatterplot(data = train, x = "Time", y = "LTE/5g Category",
               hue = "slice Type", palette="deep")
plt.xlabel("Time")
plt.ylabel("LTE/5G Category")
plt.show()

#### Packet Loss rate probability

In [118]:
fig = plt.figure(figsize = (18,12))
# For the Train dataset
plt.subplot(2,1,1)
plt.title("train data 'Packet Loss Rate' probability distribution", fontsize = 10)
train_plr_hist = sns.histplot(data = train, x = "Packet Loss Rate",
                             stat = "probability")
plt.xticks(train["Packet Loss Rate"].value_counts().index, fontsize = 10)
plt.yticks(np.arange(0, 0.5, 0.1), fontsize = 10)
plt.xlabel("", fontsize = 10)
plt.ylabel('Probabiblity', fontsize = 10)


# For Test dataset
plt.subplot(2, 1, 2)
plt.title("Test Data 'Packet Loss Rate' probabilbity distribution", fontsize = 10)
test_plr_hist = sns.histplot(data = test, x = "Packet Loss Rate", 
                             stat = "probability",
                            color = "red")
plt.xticks(test["Packet Loss Rate"].value_counts().index, fontsize = 10)
plt.yticks(np.arange(0, 0.5, 0.1), fontsize = 10)
plt.xlabel("Packet Loss Rate", fontsize = 10)
plt.ylabel("Probability", fontsize = 10)

plt.show()

In [124]:
plt.figure(figsize = (18, 10))

# For the train data
plt.subplot(2,1,1)
plt.title("Train data 'Packet Loss Rate' line plot", fontsize = 15)
plt.plot(train["Packet Loss Rate"].value_counts(), marker = '*')
plt.xticks(train["Packet Loss Rate"].unique(), fontsize = 10)
plt.yticks(train["Packet Loss Rate"].value_counts(), fontsize = 10)
plt.xlabel("", fontsize = 10)
plt.ylabel("Count", fontsize = 10)

# For Test data
plt.subplot(2,1,2)
plt.title("Test dataset 'Packet Loss Rate' line plot", fontsize = 15)
plt.plot(test["Packet Loss Rate"].value_counts(), marker = "D", color = "green")
plt.xticks(test["Packet Loss Rate"].unique(), fontsize = 10)
plt.yticks(test['Packet Loss Rate'].value_counts(), fontsize = 10)
plt.xlabel("Packet Loss Rate", fontsize = 10)
plt.ylabel("Count")

plt.show()

In [137]:
## Train data KDE plot
plt.figure(figsize = (18, 10))
plt.subplot(1,2,1)
plt.title("Train data 'Packet Delay' counts with 'slice Type'", fontsize = 10)
sns.histplot(data = train, x = 'Packet delay', hue = 'slice Type',
            palette='deep', kde = True)

# Test data KDE plot
plt.subplot(1,2,2)
plt.title("Test data 'Packet delay' probability distribution", fontsize = 10)
sns.histplot(data = test, x = "Packet delay",
            stat = 'probability', kde = True)

plt.show()

In [138]:
train.head()

Unnamed: 0,LTE/5g Category,Time,Packet Loss Rate,Packet delay,IoT,LTE/5G,GBR,Non-GBR,AR/VR/Gaming,Healthcare,Industry 4.0,IoT Devices,Public Safety,Smart City & Home,Smart Transportation,Smartphone,slice Type
0,14,0,1e-06,10,1,0,0,1,0,0,0,0,1,0,0,0,3
1,18,20,0.001,100,0,1,1,0,1,0,0,0,0,0,0,0,1
2,17,14,1e-06,300,0,1,0,1,0,0,0,0,0,0,0,1,1
3,3,17,0.01,100,0,1,0,1,0,0,0,0,0,0,0,1,1
4,9,4,0.01,50,1,0,0,1,0,0,0,0,0,1,0,0,2


In [152]:
plt.figure(figsize = (18, 10))

# IoT Device probabilty with slice type
plt.subplot(1,2,1)
plt.title("Train Data 'IoT Device' probability with 'slice Type'", fontsize = 10)
sns.histplot(data = train, x = 'slice Type', hue = "IoT Devices",
            palette=sns.color_palette("bright", 3), stat = 'probability', alpha = 0.5)

# LTE/5G

plt.subplot(1,2,2)
plt.title("Train Data 'LTE/5G' probability with 'slice Type'",
         fontsize = 10)
sns.histplot(data = train, x = 'slice Type', hue = 'LTE/5G',
            palette='muted', stat = 'probability', alpha = 0.5)
plt.show()

  sns.histplot(data = train, x = 'slice Type', hue = "IoT Devices",


In [175]:
# Find the counts of each slice for GBR
GBR_slice_type = pd.concat([train["GBR"], train['slice Type']], axis = 1)
GBR = pd.DataFrame(GBR_slice_type.value_counts().index.get_level_values(0).values, columns = ['GBR'])
slice_type = pd.DataFrame(GBR_slice_type.value_counts().index.get_level_values(1).values,
                         columns = ['slice Type'])
counts = pd.DataFrame(GBR_slice_type.value_counts().values, columns = ['Counts'])
gbr_slice_type = pd.concat([GBR, slice_type, counts], axis = 1)
print(gbr_slice_type)

   GBR  slice Type  Counts
0    1           1    8422
1    0           1    8377
2    0           3    7392
3    1           2    5512
4    0           2    1880


In [182]:
#LmPlot the GBR with slice Type total counts

sns.lmplot(data = gbr_slice_type, 
           x = "GBR", 
           y = "Counts", 
           hue = "slice Type").set(
    title = "Train data each slice Type counts by 'GBR' feature")

plt.show()

  self._figure.tight_layout(*args, **kwargs)


In [183]:
## Visualize the 'AR/VR/Gaming' with LTE/5G using DisPlot
sns.displot(data = train, x = "AR/VR/Gaming", 
            y = "LTE/5G",hue = 'slice Type', 
            palette = 'bright')

  self._figure.tight_layout(*args, **kwargs)


<seaborn.axisgrid.FacetGrid at 0x7ff2173aa9e0>