In [1]:
import torch
import pandas as pd
import numpy as np
import sklearn

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [13]:
# Import Datasets

rai_old = pd.read_csv('Dataset/rai_old_share.csv') #Release Assistance Indicator
rai_new = pd.read_csv('Dataset/rai_new_share.csv')
rtt = pd.read_csv('Dataset/rtt_share.csv') #Round Trip Time
psm = pd.read_csv('Dataset/PSM_share.csv') #inactive periods of PTW / DRX or PSM
connected = pd.read_csv('Dataset/connected_share.csv') #device waits duration of inactivity timer before going IDLE
idrx_on = pd.read_csv('Dataset/IDRX_ON_share.csv') #listen for paging occasions during IDLE mode DRX.

# Merge rai datasets together

print(rai_old.shape, rai_new.shape)
rai = rai_new.append(rai_old, ignore_index=True)
print(rai.shape)

(607, 32) (1908, 32)
(2515, 32)


In [3]:
# Check for Nan Values

# print(rai.isnull().any())
# print(rtt.isnull().any())
# print(psm.isnull().any())
# print(connected.isnull().any())
# print(idrx_on.isnull().any())

# Clean RAI NaN Values

rai.loc[rai['Location Coverage'] == 'Good','Location Coverage'] = 1
rai.loc[rai['Location Coverage'] == 'Okay','Location Coverage'] = 0
rai.loc[rai['Location Coverage'] == 'Bad','Location Coverage'] = -1

rai['tx_power_median'] = rai['tx_power_median'].fillna(-1)
rai['rssi_median'] = rai['rssi_median'].fillna(-1)
rai['rtt_median'] = rai['rtt_median'].fillna(-1)
rai['rsrp_median'] = rai['rsrp_median'].fillna(-1)
rai['ECL_median'] = rai['ECL_median'].fillna(-1)
rai['interval'] = rai['interval'].fillna(-1)
rai['active_timer'] = rai['active_timer'].fillna(-1)
rai['tau'] = rai['tau'].fillna(-1)
rai['Location Coverage'] = rai['Location Coverage'].fillna(-1)
rai['ExpectedResponse'] = rai['ExpectedResponse'].fillna(-1)
rai['timestampUETransmission'] = rai['timestampUETransmission'].fillna(-1)
rai['failure_direction'] = rai['failure_direction'].fillna(-1)
rai['energyConsumptionTx'] = rai['energyConsumptionTx'].fillna(-1)
rai['msTx'] = rai['msTx'].fillna(-1)
rai['interval'] = rai['interval'].fillna(-1)
rai['Throughput'] = rai['Throughput'].fillna(-1)
rai['timestampServerCapture'] = rai['timestampServerCapture'].fillna(-1)

print('RAI NaN Values: {}'.format(rai.isnull().any().sum()))
      
# Clean RTT NaN Values

rtt['rtt'] = rtt['rtt'].fillna(-1)
rtt['ExpectedResponse'] = rtt['ExpectedResponse'].fillna(-1)
rtt['timestampUETransmission'] = rtt['timestampUETransmission'].fillna(-1)
rtt['timestampServerCapture'] = rtt['timestampServerCapture'].fillna(-1)
rtt['active_timer'] = rtt['active_timer'].fillna(-1)
rtt['note'] = rtt['note'].fillna(-1)
rtt['tau'] = rtt['tau'].fillna(-1)

print('RTT NaN Values: {}'.format(rtt.isnull().any().sum()))

# Clean PSM NaN Values

psm['current_max'] = psm['current_max'].fillna(-1)
psm['rsrp_mostCommon'] = psm['rsrp_mostCommon'].fillna(-1)
psm['ECL_mostCommon'] = psm['ECL_mostCommon'].fillna(-1)
psm['active_timer'] = psm['active_timer'].fillna(-1)
psm['tau'] = psm['tau'].fillna(-1)
psm['note'] = psm['note'].fillna(-1)

print('PSM NaN Values: {}'.format(psm.isnull().any().sum()))

# Clean Connected NaN Values

connected['tx_power_median'] = connected['tx_power_median'].fillna(-1)
connected['rssi_median'] = connected['rssi_median'].fillna(-1)
connected['rtt_median'] = connected['rtt_median'].fillna(-1)
connected['rsrp_median'] = connected['rsrp_median'].fillna(-1)
connected['ECL_median'] = connected['ECL_median'].fillna(-1)
connected['energyConsumptionTx'] = connected['energyConsumptionTx'].fillna(-1)
connected['msTx'] = connected['msTx'].fillna(-1)
connected['active_timer'] = connected['active_timer'].fillna(-1)
connected['active_timer'] = connected['active_timer'].fillna(-1)
connected['tau'] = connected['tau'].fillna(-1)
connected['note'] = connected['note'].fillna(-1)
connected['Throughput'] = connected['Throughput'].fillna(-1)
connected['ExpectedResponse'] = connected['ExpectedResponse'].fillna(-1)
connected['timestampUETransmission'] = connected['timestampUETransmission'].fillna(-1)
connected['timestampServerCapture'] = connected['timestampServerCapture'].fillna(-1)
connected['failure_direction'] = connected['failure_direction'].fillna(-1)

print('Connected NaN Values: {}'.format(connected.isnull().any().sum()))

# Clean Idrx_on NaN Values 

idrx_on['snr_mostCommon'] = idrx_on['snr_mostCommon'].fillna(-1)
idrx_on['ECL_mostCommon'] = idrx_on['ECL_mostCommon'].fillna(-1)
idrx_on['rsrp_mostCommon'] = idrx_on['rsrp_mostCommon'].fillna(-1)
idrx_on['rsrq_mostCommon'] = idrx_on['rsrq_mostCommon'].fillna(-1)
idrx_on['active_timer'] = idrx_on['active_timer'].fillna(-1)
idrx_on['tau'] = idrx_on['tau'].fillna(-1)
idrx_on['note'] = idrx_on['note'].fillna(-1)

print('IDRX_On NaN Values: {}'.format(idrx_on.isnull().any().sum()))

RAI NaN Values: 0
RTT NaN Values: 0
PSM NaN Values: 0
Connected NaN Values: 0
IDRX_On NaN Values: 0


In [4]:
# Drop Unnecessary RAI Columns

print('RAI shape {}: Index Values = {}'.format(rai.shape,rai.columns.values))
rai.drop(columns=['measurement_id','ECL_median','ExpectedResponse','con_idle_stateNumber','code','state','module','operator','note','operator_humanReadable','site','date'],inplace=True,axis=0)
print('RAI shape {}: Index Values = {}'.format(rai.shape,rai.columns.values))

# Drop Unnecessary RTT Columns

print('RTT shape {}: Index Values = {}'.format(rtt.shape,rtt.columns.values))
rtt.drop(columns=['measurement_id','startTime','timestampUETransmission','timestampServerCapture','ExpectedResponse','con_idle_stateNumber','module','operator','note','operator_humanReadable','site'],inplace=True,axis=0)
print('RTT shape {}: Index Values = {}'.format(rtt.shape,rtt.columns.values))

# Drop Unnecessary PSM Columns

print('PSM shape {}: Index Values = {}'.format(psm.shape,psm.columns.values))
psm.drop(columns=['measurement_id','con_idle_stateNumber','current_max','rsrp_mostCommon','ECL_mostCommon','state','module','operator','note','operator_humanReadable','site','date'],inplace=True,axis=0)
print('PSM shape {}: Index Values = {}'.format(psm.shape,psm.columns.values))

# Drop Unnecessary Connected Columns

print('Connected shape {}: Index Values = {}'.format(connected.shape,connected.columns.values))
connected.drop(columns=['measurement_id','current_max','timestampServerCapture','timestampUETransmission','ExpectedResponse','ECL_median','rsrp_median','rtt_median','rssi_median','con_idle_stateNumber','state','module','operator','note','operator_humanReadable','site','date'],inplace=True,axis=0)
print('Connected shape {}: Index Values = {}'.format(connected.shape,connected.columns.values))

# Drop Unnecessary IDRX_On Columns

print('IDRX shape {}: Index Values = {}'.format(idrx_on.shape,idrx_on.columns.values))
idrx_on.drop(columns=['measurement_id','con_idle_stateNumber','current_max','snr_mostCommon','ECL_mostCommon','rsrp_mostCommon','rsrq_mostCommon','state','module','operator','note','operator_humanReadable','site','date'],inplace=True,axis=0)
print('IDRX shape {}: Index Values = {}'.format(idrx_on.shape,idrx_on.columns.values))

RAI shape (2515, 32): Index Values = ['measurement_id' 'con_idle_stateNumber' 'state' 'energyConsumption' 'ms'
 'tx_power_median' 'current_max' 'rssi_median' 'rtt_median' 'rsrp_median'
 'ECL_median' 'code' 'Location Coverage' 'ExpectedResponse'
 'timestampUETransmission' 'timestampServerCapture' 'failure_direction'
 'energyConsumptionTx' 'msTx' 'date' 'duration' 'module' 'operator'
 'interval' 'psize' 'site' 'active_timer' 'tau' 'note'
 'operator_humanReadable' 'Packet Size [Bytes]' 'Throughput']
RAI shape (2515, 20): Index Values = ['energyConsumption' 'ms' 'tx_power_median' 'current_max' 'rssi_median'
 'rtt_median' 'rsrp_median' 'Location Coverage' 'timestampUETransmission'
 'timestampServerCapture' 'failure_direction' 'energyConsumptionTx' 'msTx'
 'duration' 'interval' 'psize' 'active_timer' 'tau' 'Packet Size [Bytes]'
 'Throughput']
RTT shape (21458, 19): Index Values = ['measurement_id' 'con_idle_stateNumber' 'rtt' 'ExpectedResponse'
 'timestampUETransmission' 'timestampServerCapt

In [5]:
#have latency and energy consumption as target values to be predicted


#Create tensor of shape [input,output]


#Split data into trainined,testing and validation sets


#Test with k-means/random-forest/Decision Tree/Stochastic Gradient Descent/Naive Bayes