In [1]:
# pip install fast_ml

In [2]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn import metrics, svm
from fast_ml.model_development import train_valid_test_split
import warnings
warnings.filterwarnings("ignore")

In [3]:
from sklearn.metrics import recall_score, mean_absolute_error

def recallModel(Y_predicted, Y_true, datasetType: str, model):
    if datasetType == "Categorical":
        return metrics.recall_score(Y_predicted, Y_true, average = 'macro')
    return metrics.r2_score(Y_true, Y_predicted)
    

def runModel(model, X_test, Y_test, datasetType: str): 
    Y_predicted = model.predict(X_test)
    return recallModel (Y_predicted, Y_test,datasetType, model)


In [4]:
# models
def trainCategoricalModel (X,Y):
    return LogisticRegression().fit(X, Y)

def trainCatergoricalModel_ (X,Y):
    return GaussianNB().fit(X, Y)

def trainCatModel (X,Y):
    model  = svm.SVC(kernel='linear')
    return model.fit(X, Y)

def trainContinuousModel (X,Y):
    return LinearRegression().fit(X, Y)

def trainContinuousModel_ (X,Y):
    Y = Y.astype('int')
    model = KNeighborsClassifier(n_neighbors=1)
    return model.fit(X,Y)

def trainConModel(X,Y):
    Y = Y.astype('int')
    model = RandomForestRegressor(n_estimators= 1000, random_state=42)
    return model.fit(X,Y)

In [5]:
# def runModel(model, X_test, Y_test, datasetType: str): 
#     Y_predicted = model.predict(X_test)
#     return recallModel (Y_predicted, Y_test,datasetType, model)


# def runModel(model, X_test, Y_test,datasetType: str): 
#     Y_predicted = model.predict(X_test)
#     return calculateAccuracy (Y_predicted, Y_test,datasetType, model)

In [6]:
def displayInfo (training, edge1, edge2, edge3, edge4):
    print ("Recall on Training Data: ", training)
    print ("Recall on edge1 Data: ", edge1)
    print ("Recall on edge2 Data: ", edge2)
    print ("Recall on edge3 Data: ", edge3)
    print ("Recall on edge4 Data: ", edge4)

In [7]:
def loadData (df,predictName: str,datasetType: str):

    X = df.drop(columns=[predictName])  # Features
    y = df[predictName]                # Target
    
    # First split: Split data into two halves (50% training and 50% for further splits)
    model_train_X, X_remaining, model_train_Y, y_remaining = train_test_split(X, y, train_size=0.4, random_state=42)
    
    # Second split: Split the remaining data into 2 (50% for edge server 1 and 50% for edge server 2)
    edgeServer1_X, edgeServer2_X, edgeServer1_Y, edgeServer2_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    # Third split: Split the remaining data into 2 (50% for edge server 3 and 50% for edge server 4)
    edgeServer3_X, edgeServer4_X, edgeServer3_Y, edgeServer4_Y = train_test_split(X_remaining, y_remaining, train_size=0.4, random_state=42)
    
    if datasetType == "Categorical":
        model = trainCategoricalModel (model_train_X, model_train_Y)
        
    else:
        model = trainContinuousModel (model_train_X, model_train_Y)
        
        
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = runModel (model, model_train_X, model_train_Y,datasetType), runModel(model,edgeServer1_X, edgeServer1_Y,datasetType), runModel(model,edgeServer2_X, edgeServer2_Y,datasetType), runModel(model,edgeServer3_X, edgeServer3_Y,datasetType), runModel(model,edgeServer4_X, edgeServer4_Y,datasetType)

    return trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy


In [8]:
def loaddata (df,predictName: str,datasetType: str):
    
    X = df.drop(columns=[predictName])  # Features
    y = df[predictName]                # Target
    
    # First split: Split data into two halves (50% training and 50% for further splits)
    model_train_X, X_remaining, model_train_Y, y_remaining = train_test_split(X, y, train_size=0.4, random_state=42)
    
    # Second split: Split the remaining data into 2 (50% for edge server 1 and 50% for edge server 2)
    edgeServer1_X, edgeServer2_X, edgeServer1_Y, edgeServer2_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    # Third split: Split the remaining data into 2 (50% for edge server 3 and 50% for edge server 4)
    edgeServer3_X, edgeServer4_X, edgeServer3_Y, edgeServer4_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    if datasetType == "Categorical":
        
        model_a = trainCatergoricalModel_(model_train_X, model_train_Y)
        
    else:
        
        model_a = trainContinuousModel_(model_train_X, model_train_Y)   
    

    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = runModel (model_a, model_train_X, model_train_Y,datasetType), runModel(model_a,edgeServer1_X, edgeServer1_Y,datasetType), runModel(model_a,edgeServer2_X, edgeServer2_Y,datasetType), runModel(model_a,edgeServer3_X, edgeServer3_Y,datasetType), runModel(model_a,edgeServer4_X, edgeServer4_Y,datasetType)
    
    return training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A
    

In [9]:
def load_data (df,predictName: str,datasetType: str):
    
    X = df.drop(columns=[predictName])  # Features
    y = df[predictName]                # Target
    
    # First split: Split data into two halves (50% training and 50% for further splits)
    model_train_X, X_remaining, model_train_Y, y_remaining = train_test_split(X, y, train_size=0.4, random_state=42)
    
    # Second split: Split the remaining data into 2 (50% for edge server 1 and 50% for edge server 2)
    edgeServer1_X, edgeServer2_X, edgeServer1_Y, edgeServer2_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    # Third split: Split the remaining data into 2 (50% for edge server 3 and 50% for edge server 4)
    edgeServer3_X, edgeServer4_X, edgeServer3_Y, edgeServer4_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)

    
    if datasetType == "Categorical":
        
        model_b = trainCatModel(model_train_X, model_train_Y)
        
    else:
       
        model_b = trainConModel(model_train_X, model_train_Y)
        
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = runModel (model_b, model_train_X, model_train_Y,datasetType), runModel(model_b,edgeServer1_X, edgeServer1_Y,datasetType), runModel(model_b,edgeServer2_X, edgeServer2_Y,datasetType), runModel(model_b,edgeServer3_X, edgeServer3_Y,datasetType), runModel(model_b,edgeServer4_X, edgeServer4_Y,datasetType) 
    
        
    return training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B


In [10]:
def Occupancy():
    print("-------------- Occupancy Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/Occupancy.csv')
    df.drop(columns = ["date"],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'Occupancy',"Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'Occupancy',"Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'Occupancy',"Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [11]:
def powerConsumption():
    print("-------------- Power Consumption Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/Tetuan City power consumption.csv')
    df.drop(columns = ['DateTime','Zone 2  Power Consumption', 'Zone 3  Power Consumption'],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'Zone 1 Power Consumption',"Continuous")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'Zone 1 Power Consumption',"Continuous")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'Zone 1 Power Consumption',"Continuous")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [12]:
def accelerometer_w():
    print("-------------- Accelerometer Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/accelerometer.csv')
#     df.drop(columns = ['wconfid','pctid', 'x', 'y', 'z'],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'wconfid',"Continuous")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'wconfid',"Continuous")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'wconfid',"Continuous")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [13]:
def activity_wsdata():
    print("-------------- Wearable Sensor Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/activity data.csv')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'activity',"Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'activity',"Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'activity',"Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [14]:
def ALEdata():
    print("-------------- ALE Sensor Data Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/ALE in Sensor.csv')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'sd_ale',"Continuous")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A  = loaddata (df,'sd_ale',"Continuous")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'sd_ale',"Continuous")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [15]:
def banknote_data():
    print("-------------- Bank Note Authentication Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/banknote_authen.txt')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'class',"Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'class',"Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'class',"Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return

In [16]:
def RSSI():
    print("-------------- BLE RSSI Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/BLE RSSI.csv')
    df.drop(columns = ['name'],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'locationStatus', "Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A  = loaddata (df,'locationStatus', "Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B  = load_data (df,'locationStatus', "Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [17]:
def e_grid():
    print("-------------- Simulated Electrical Grid Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/UCI_named.csv')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'stabf', 'Categorical')
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A  = loaddata (df,'stabf', 'Categorical')
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'stabf', 'Categorical')
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return  

In [18]:
# if __name__=="__main__":
#     Occupancy()
#     powerConsumption()
#     accelerometer_w()
#     activity_wsdata()
#     ALEdata()
#     banknote_data()
#     RSSI()
#     e_grid()

In [19]:
Occupancy()

-------------- Occupancy Dataset Info -------------------- 
Recall on Training Data:  0.9742626894340249
Recall on edge1 Data:  0.9826525851588015
Recall on edge2 Data:  0.9805667539905484
Recall on edge3 Data:  0.9849098297443198
Recall on edge4 Data:  0.9793891496366611
------------------------------------------------------------
Recall on Training Data:  0.9364947338137272
Recall on edge1 Data:  0.9427749166425125
Recall on edge2 Data:  0.9372447082037307
Recall on edge3 Data:  0.9427749166425125
Recall on edge4 Data:  0.9372447082037307
------------------------------------------------------------
Recall on Training Data:  0.9741139562930157
Recall on edge1 Data:  0.9827699386392282
Recall on edge2 Data:  0.9800412620750669
Recall on edge3 Data:  0.9827699386392282
Recall on edge4 Data:  0.9800412620750669


In [20]:
powerConsumption()

-------------- Power Consumption Dataset Info -------------------- 
Recall on Training Data:  0.2027594046484852
Recall on edge1 Data:  0.20574927510387442
Recall on edge2 Data:  0.21342267738051168
Recall on edge3 Data:  0.2039750439211221
Recall on edge4 Data:  0.21330338109370983
------------------------------------------------------------
Recall on Training Data:  0.999999993588981
Recall on edge1 Data:  -0.05201312579924089
Recall on edge2 Data:  -0.0484122752946603
Recall on edge3 Data:  -0.05201312579924089
Recall on edge4 Data:  -0.0484122752946603
------------------------------------------------------------
Recall on Training Data:  0.9344843684236297
Recall on edge1 Data:  0.5064163270474468
Recall on edge2 Data:  0.5152111463112402
Recall on edge3 Data:  0.5064163270474468
Recall on edge4 Data:  0.5152111463112402


In [21]:
accelerometer_w()

-------------- Accelerometer Dataset Info -------------------- 
Recall on Training Data:  7.65210790154569e-05
Recall on edge1 Data:  5.6376002772418055e-05
Recall on edge2 Data:  -0.00014952806744905622
Recall on edge3 Data:  3.07991945365238e-05
Recall on edge4 Data:  -1.889229437024298e-05
------------------------------------------------------------
Recall on Training Data:  0.9531800745860894
Recall on edge1 Data:  0.25144335092716896
Recall on edge2 Data:  0.2501146278536095
Recall on edge3 Data:  0.25144335092716896
Recall on edge4 Data:  0.2501146278536095
------------------------------------------------------------
Recall on Training Data:  0.922768543738738
Recall on edge1 Data:  0.5500364104947754
Recall on edge2 Data:  0.5550193240880725
Recall on edge3 Data:  0.5500364104947754
Recall on edge4 Data:  0.5550193240880725


In [22]:
activity_wsdata()

-------------- Wearable Sensor Dataset Info -------------------- 
Recall on Training Data:  0.997983870967742
Recall on edge1 Data:  0.6639784946236559
Recall on edge2 Data:  0.32798573975044565
Recall on edge3 Data:  0.6632996632996634
Recall on edge4 Data:  0.32887899034892354
------------------------------------------------------------
Recall on Training Data:  1.0
Recall on edge1 Data:  0.6648697214734951
Recall on edge2 Data:  0.3288650580875782
Recall on edge3 Data:  0.6648697214734951
Recall on edge4 Data:  0.3288650580875782
------------------------------------------------------------
Recall on Training Data:  1.0
Recall on edge1 Data:  0.5
Recall on edge2 Data:  0.4148648648648649
Recall on edge3 Data:  0.5
Recall on edge4 Data:  0.4148648648648649


In [23]:
ALEdata()

-------------- ALE Sensor Data Dataset Info -------------------- 
Recall on Training Data:  0.684419755654035
Recall on edge1 Data:  0.7009833823571244
Recall on edge2 Data:  0.40898865597025014
Recall on edge3 Data:  0.6914464514462756
Recall on edge4 Data:  0.4833816592230403
------------------------------------------------------------
Recall on Training Data:  -2.204074310664261
Recall on edge1 Data:  -1.6997869395144232
Recall on edge2 Data:  -2.748503755022698
Recall on edge3 Data:  -1.6997869395144232
Recall on edge4 Data:  -2.748503755022698
------------------------------------------------------------
Recall on Training Data:  -2.204074310664261
Recall on edge1 Data:  -1.6997869395144232
Recall on edge2 Data:  -2.748503755022698
Recall on edge3 Data:  -1.6997869395144232
Recall on edge4 Data:  -2.748503755022698


In [24]:
banknote_data()

-------------- Bank Note Authentication Dataset Info -------------------- 
Recall on Training Data:  0.9904751712328768
Recall on edge1 Data:  0.9917127071823204
Recall on edge2 Data:  0.9873563218390804
Recall on edge3 Data:  0.9964788732394366
Recall on edge4 Data:  0.9849612864800477
------------------------------------------------------------
Recall on Training Data:  0.8491612554112553
Recall on edge1 Data:  0.8531426318772021
Recall on edge2 Data:  0.8104172703564184
Recall on edge3 Data:  0.8531426318772021
Recall on edge4 Data:  0.8104172703564184
------------------------------------------------------------
Recall on Training Data:  0.9922178988326849
Recall on edge1 Data:  0.9917127071823204
Recall on edge2 Data:  0.9873563218390804
Recall on edge3 Data:  0.9917127071823204
Recall on edge4 Data:  0.9873563218390804


In [None]:
RSSI()

-------------- BLE RSSI Dataset Info -------------------- 


In [None]:
e_grid()