In [1]:
# pip install fast_ml

In [2]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn import metrics, svm
from fast_ml.model_development import train_valid_test_split
import warnings
warnings.filterwarnings("ignore")

In [3]:
def displayInfo (training, edge1, edge2, edge3, edge4):
    print ("Precision on Training Data: ", training)
    print ("Precision on edge1 Data: ", edge1)
    print ("Precision on edge2 Data: ", edge2)
    print ("Precision on edge3 Data: ", edge3)
    print ("Precision on edge4 Data: ", edge4)

In [4]:
def calculateAccuracy (Y_predicted, Y_true, datasetType: str, model):
    if datasetType == "Categorical":
        return metrics.precision_score(Y_true, Y_predicted,average='macro')
    return metrics.mean_squared_error(Y_true, Y_predicted)

In [5]:
# models
def trainCategoricalModel (X,Y):
    return LogisticRegression().fit(X, Y)

def trainCatergoricalModel_ (X,Y):
    return GaussianNB().fit(X, Y)

def trainCatModel (X,Y):
    model  = svm.SVC(kernel='linear')
    return model.fit(X, Y)

def trainContinuousModel (X,Y):
    return LinearRegression().fit(X, Y)

def trainContinuousModel_ (X,Y):
    Y = Y.astype('int')
    model = KNeighborsClassifier(n_neighbors=1)
    return model.fit(X,Y)

def trainConModel(X,Y):
    Y = Y.astype('int')
    model = RandomForestRegressor(n_estimators= 1000, random_state=42)
    return model.fit(X,Y)

In [6]:
def runModel(model, X_test, Y_test,datasetType: str): 
    Y_predicted = model.predict(X_test)
    return calculateAccuracy (Y_predicted, Y_test,datasetType, model)

In [7]:
def loadData (df,predictName: str,datasetType: str):

    X = df.drop(columns=[predictName])  # Features
    y = df[predictName]                # Target
    
    # First split: Split data into two halves (50% training and 50% for further splits)
    model_train_X, X_remaining, model_train_Y, y_remaining = train_test_split(X, y, train_size=0.5, random_state=42)
    
    # Second split: Split the remaining data into 2 (50% for edge server 1 and 50% for edge server 2)
    edgeServer1_X, edgeServer2_X, edgeServer1_Y, edgeServer2_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    # Third split: Split the remaining data into 2 (50% for edge server 3 and 50% for edge server 4)
    edgeServer3_X, edgeServer4_X, edgeServer3_Y, edgeServer4_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    if datasetType == "Categorical":
        model = trainCategoricalModel (model_train_X, model_train_Y)
        
    else:
        model = trainContinuousModel (model_train_X, model_train_Y)
        
        
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = runModel (model, model_train_X, model_train_Y,datasetType), runModel(model,edgeServer1_X, edgeServer1_Y,datasetType), runModel(model,edgeServer2_X, edgeServer2_Y,datasetType), runModel(model,edgeServer3_X, edgeServer3_Y,datasetType), runModel(model,edgeServer4_X, edgeServer4_Y,datasetType)

    return trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy


In [8]:
def loaddata (df,predictName: str,datasetType: str):
    
    X = df.drop(columns=[predictName])  # Features
    y = df[predictName]                # Target
    
    # First split: Split data into two halves (50% training and 50% for further splits)
    model_train_X, X_remaining, model_train_Y, y_remaining = train_test_split(X, y, train_size=0.5, random_state=42)
    
    # Second split: Split the remaining data into 2 (50% for edge server 1 and 50% for edge server 2)
    edgeServer1_X, edgeServer2_X, edgeServer1_Y, edgeServer2_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    # Third split: Split the remaining data into 2 (50% for edge server 3 and 50% for edge server 4)
    edgeServer3_X, edgeServer4_X, edgeServer3_Y, edgeServer4_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    if datasetType == "Categorical":
        
        model_a = trainCatergoricalModel_(model_train_X, model_train_Y)
        
    else:
        
        model_a = trainContinuousModel_(model_train_X, model_train_Y)   
    

    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = runModel (model_a, model_train_X, model_train_Y,datasetType), runModel(model_a,edgeServer1_X, edgeServer1_Y,datasetType), runModel(model_a,edgeServer2_X, edgeServer2_Y,datasetType), runModel(model_a,edgeServer3_X, edgeServer3_Y,datasetType), runModel(model_a,edgeServer4_X, edgeServer4_Y,datasetType)
    
    return training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A
    

In [9]:
def load_data (df,predictName: str,datasetType: str):
    
    X = df.drop(columns=[predictName])  # Features
    y = df[predictName]                # Target
    
    # First split: Split data into two halves (50% training and 50% for further splits)
    model_train_X, X_remaining, model_train_Y, y_remaining = train_test_split(X, y, train_size=0.5, random_state=42)
    
    # Second split: Split the remaining data into 2 (50% for edge server 1 and 50% for edge server 2)
    edgeServer1_X, edgeServer2_X, edgeServer1_Y, edgeServer2_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    # Third split: Split the remaining data into 2 (50% for edge server 3 and 50% for edge server 4)
    edgeServer3_X, edgeServer4_X, edgeServer3_Y, edgeServer4_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)

    
    if datasetType == "Categorical":
        
        model_b = trainCatModel(model_train_X, model_train_Y)
        
    else:
       
        model_b = trainConModel(model_train_X, model_train_Y)
        
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = runModel (model_b, model_train_X, model_train_Y,datasetType), runModel(model_b,edgeServer1_X, edgeServer1_Y,datasetType), runModel(model_b,edgeServer2_X, edgeServer2_Y,datasetType), runModel(model_b,edgeServer3_X, edgeServer3_Y,datasetType), runModel(model_b,edgeServer4_X, edgeServer4_Y,datasetType) 
    
        
    return training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B


In [10]:
def Occupancy():
    print("-------------- Occupancy Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/Occupancy.csv')
    df.drop(columns = ["date"],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'Occupancy',"Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'Occupancy',"Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'Occupancy',"Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [11]:
def powerConsumption():
    print("-------------- Power Consumption Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/Tetuan City power consumption.csv')
    df.drop(columns = ['DateTime','Zone 2  Power Consumption', 'Zone 3  Power Consumption'],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'Zone 1 Power Consumption',"Continuous")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'Zone 1 Power Consumption',"Continuous")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'Zone 1 Power Consumption',"Continuous")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [12]:
def accelerometer_w():
    print("-------------- Accelerometer Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/accelerometer.csv')
#     df.drop(columns = ['wconfid','pctid', 'x', 'y', 'z'],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'wconfid',"Continuous")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'wconfid',"Continuous")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'wconfid',"Continuous")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [13]:
def activity_wsdata():
    print("-------------- Wearable Sensor Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/activity data.csv')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'activity',"Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'activity',"Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'activity',"Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [14]:
def ALEdata():
    print("-------------- ALE Sensor Data Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/ALE in Sensor.csv')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'sd_ale',"Continuous")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A  = loaddata (df,'sd_ale',"Continuous")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'sd_ale',"Continuous")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [15]:
def banknote_data():
    print("-------------- Bank Note Authentication Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/banknote_authen.txt')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'class',"Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'class',"Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'class',"Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return

In [16]:
def RSSI():
    print("-------------- BLE RSSI Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/BLE RSSI.csv')
    df.drop(columns = ['name'],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'locationStatus', "Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A  = loaddata (df,'locationStatus', "Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B  = load_data (df,'locationStatus', "Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [17]:
def e_grid():
    print("-------------- Simulated Electrical Grid Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/UCI_named.csv')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'stabf', 'Categorical')
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A  = loaddata (df,'stabf', 'Categorical')
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'stabf', 'Categorical')
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return  

In [18]:
# if __name__=="__main__":
#     Occupancy()
#     powerConsumption()
#     accelerometer_w()
#     activity_wsdata()
#     ALEdata()
#     banknote_data()
#     RSSI()
#     e_grid()

In [19]:
Occupancy()

-------------- Occupancy Dataset Info -------------------- 
Precision on Training Data:  0.9756555008005323
Precision on edge1 Data:  0.9830036196897478
Precision on edge2 Data:  0.9796027053755149
Precision on edge3 Data:  0.9830036196897478
Precision on edge4 Data:  0.9796027053755149
------------------------------------------------------------
Precision on Training Data:  0.9368955579765137
Precision on edge1 Data:  0.9411054832628709
Precision on edge2 Data:  0.9347832627132062
Precision on edge3 Data:  0.9411054832628709
Precision on edge4 Data:  0.9347832627132062
------------------------------------------------------------
Precision on Training Data:  0.9753418782709702
Precision on edge1 Data:  0.9832871713493903
Precision on edge2 Data:  0.9788032060107311
Precision on edge3 Data:  0.9832871713493903
Precision on edge4 Data:  0.9788032060107311


In [20]:
powerConsumption()

-------------- Power Consumption Dataset Info -------------------- 
Precision on Training Data:  40438444.12822876
Precision on edge1 Data:  39916750.83245769
Precision on edge2 Data:  40454315.68410782
Precision on edge3 Data:  39916750.83245769
Precision on edge4 Data:  40454315.68410782
------------------------------------------------------------
Precision on Training Data:  0.3239834486681542
Precision on edge1 Data:  51887803.65204311
Precision on edge2 Data:  51541936.14630976
Precision on edge3 Data:  51887803.65204311
Precision on edge4 Data:  51541936.14630976
------------------------------------------------------------
Precision on Training Data:  3189960.186173851
Precision on edge1 Data:  23260268.067719262
Precision on edge2 Data:  24546895.81858039
Precision on edge3 Data:  23260268.067719262
Precision on edge4 Data:  24546895.81858039


In [21]:
accelerometer_w()

-------------- Accelerometer Dataset Info -------------------- 
Precision on Training Data:  0.6667663049192497
Precision on edge1 Data:  0.6674918030989705
Precision on edge2 Data:  0.6655160560526169
Precision on edge3 Data:  0.6674918030989705
Precision on edge4 Data:  0.6655160560526169
------------------------------------------------------------
Precision on Training Data:  0.03669281045751634
Precision on edge1 Data:  0.5035032679738562
Precision on edge2 Data:  0.49349019607843136
Precision on edge3 Data:  0.5035032679738562
Precision on edge4 Data:  0.49349019607843136
------------------------------------------------------------
Precision on Training Data:  0.05342808817381868
Precision on edge1 Data:  0.300948725224751
Precision on edge2 Data:  0.29286757439532735
Precision on edge3 Data:  0.300948725224751
Precision on edge4 Data:  0.29286757439532735


In [22]:
activity_wsdata()

-------------- Wearable Sensor Dataset Info -------------------- 
Precision on Training Data:  0.998389694041868
Precision on edge1 Data:  0.32690246516613075
Precision on edge2 Data:  0.6634408602150538
Precision on edge3 Data:  0.32690246516613075
Precision on edge4 Data:  0.6634408602150538
------------------------------------------------------------
Precision on Training Data:  1.0
Precision on edge1 Data:  0.32690246516613075
Precision on edge2 Data:  0.5544733044733045
Precision on edge3 Data:  0.32690246516613075
Precision on edge4 Data:  0.5544733044733045
------------------------------------------------------------
Precision on Training Data:  1.0
Precision on edge1 Data:  0.4144951140065147
Precision on edge2 Data:  0.5
Precision on edge3 Data:  0.4144951140065147
Precision on edge4 Data:  0.5


In [23]:
ALEdata()

-------------- ALE Sensor Data Dataset Info -------------------- 
Precision on Training Data:  0.010143001773508614
Precision on edge1 Data:  0.011910099917907448
Precision on edge2 Data:  0.014101075379066864
Precision on edge3 Data:  0.011910099917907448
Precision on edge4 Data:  0.014101075379066864
------------------------------------------------------------
Precision on Training Data:  0.09868112242844222
Precision on edge1 Data:  0.11302978279327824
Precision on edge2 Data:  0.10505958849787611
Precision on edge3 Data:  0.11302978279327824
Precision on edge4 Data:  0.10505958849787611
------------------------------------------------------------
Precision on Training Data:  0.09868112242844222
Precision on edge1 Data:  0.11302978279327824
Precision on edge2 Data:  0.10505958849787611
Precision on edge3 Data:  0.11302978279327824
Precision on edge4 Data:  0.10505958849787611


In [24]:
banknote_data()

-------------- Bank Note Authentication Dataset Info -------------------- 
Precision on Training Data:  0.9891284815813117
Precision on edge1 Data:  0.987070770519263
Precision on edge2 Data:  0.994132740335272
Precision on edge3 Data:  0.987070770519263
Precision on edge4 Data:  0.994132740335272
------------------------------------------------------------
Precision on Training Data:  0.8413490347289164
Precision on edge1 Data:  0.8469948298147351
Precision on edge2 Data:  0.8171768707482994
Precision on edge3 Data:  0.8469948298147351
Precision on edge4 Data:  0.8171768707482994
------------------------------------------------------------
Precision on Training Data:  0.9863938877420791
Precision on edge1 Data:  0.9845104895104895
Precision on edge2 Data:  0.994132740335272
Precision on edge3 Data:  0.9845104895104895
Precision on edge4 Data:  0.994132740335272


In [25]:
e_grid()

-------------- Simulated Electrical Grid Dataset Info -------------------- 
Precision on Training Data:  0.8711957062466787
Precision on edge1 Data:  0.8608095520846413
Precision on edge2 Data:  0.8697828261478557
Precision on edge3 Data:  0.8608095520846413
Precision on edge4 Data:  0.8697828261478557
------------------------------------------------------------
Precision on Training Data:  0.98038389923226
Precision on edge1 Data:  0.9652430988742853
Precision on edge2 Data:  0.9756678754408143
Precision on edge3 Data:  0.9652430988742853
Precision on edge4 Data:  0.9756678754408143
------------------------------------------------------------
Precision on Training Data:  0.9261458518881367
Precision on edge1 Data:  0.9185247889725394
Precision on edge2 Data:  0.9282034874420093
Precision on edge3 Data:  0.9185247889725394
Precision on edge4 Data:  0.9282034874420093


In [26]:
# e_grid()