In [1]:
# pip install fast_ml

In [2]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn import metrics, svm
from fast_ml.model_development import train_valid_test_split
import warnings
warnings.filterwarnings("ignore")

In [3]:
from sklearn.metrics import f1_score

def fModel(Y_predicted, Y_true, datasetType: str, model):
    if datasetType == "Categorical":
        return metrics.f1_score(Y_predicted, Y_true, average = 'macro')
    return metrics.mean_squared_error(Y_true, Y_predicted)


def runModel(model, X_test, Y_test, datasetType: str): 
    Y_predicted = model.predict(X_test)
    return fModel(Y_predicted, Y_test,datasetType, model)

In [4]:
# models
def trainCategoricalModel (X,Y):
    return LogisticRegression().fit(X, Y)

def trainCatergoricalModel_ (X,Y):
    return GaussianNB().fit(X, Y)

def trainCatModel (X,Y):
    model  = svm.SVC(kernel='linear')
    return model.fit(X, Y)

def trainContinuousModel (X,Y):
    return LinearRegression().fit(X, Y)

def trainContinuousModel_ (X,Y):
    Y = Y.astype('int')
    model = KNeighborsClassifier(n_neighbors=1)
    return model.fit(X,Y)

def trainConModel(X,Y):
    Y = Y.astype('int')
    model = RandomForestRegressor(n_estimators= 1000, random_state=42)
    return model.fit(X,Y)

In [5]:
def displayInfo (training, edge1, edge2, edge3, edge4):
    print ("F1 Score on Training Data: ", training)
    print ("F1 Score on edge1 Data: ", edge1)
    print ("F1 Score on edge2 Data: ", edge2)
    print ("F1 Score on edge3 Data: ", edge3)
    print ("F1 Score on edge4 Data: ", edge4)

In [6]:
def loadData (df,predictName: str,datasetType: str):

    X = df.drop(columns=[predictName])  # Features
    y = df[predictName]                # Target
    
    # First split: Split data into two halves (50% training and 50% for further splits)
    model_train_X, X_remaining, model_train_Y, y_remaining = train_test_split(X, y, train_size=0.4, random_state=42)
    
    # Second split: Split the remaining data into 2 (50% for edge server 1 and 50% for edge server 2)
    edgeServer1_X, edgeServer2_X, edgeServer1_Y, edgeServer2_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    # Third split: Split the remaining data into 2 (50% for edge server 3 and 50% for edge server 4)
    edgeServer3_X, edgeServer4_X, edgeServer3_Y, edgeServer4_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    if datasetType == "Categorical":
        model = trainCategoricalModel (model_train_X, model_train_Y)
        
    else:
        model = trainContinuousModel (model_train_X, model_train_Y)
        
        
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = runModel (model, model_train_X, model_train_Y,datasetType), runModel(model,edgeServer1_X, edgeServer1_Y,datasetType), runModel(model,edgeServer2_X, edgeServer2_Y,datasetType), runModel(model,edgeServer3_X, edgeServer3_Y,datasetType), runModel(model,edgeServer4_X, edgeServer4_Y,datasetType)

    return trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy


In [7]:
def loaddata (df,predictName: str,datasetType: str):
    
    X = df.drop(columns=[predictName])  # Features
    y = df[predictName]                # Target
    
    # First split: Split data into two halves (50% training and 50% for further splits)
    model_train_X, X_remaining, model_train_Y, y_remaining = train_test_split(X, y, train_size=0.4, random_state=42)
    
    # Second split: Split the remaining data into 2 (50% for edge server 1 and 50% for edge server 2)
    edgeServer1_X, edgeServer2_X, edgeServer1_Y, edgeServer2_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    # Third split: Split the remaining data into 2 (50% for edge server 3 and 50% for edge server 4)
    edgeServer3_X, edgeServer4_X, edgeServer3_Y, edgeServer4_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    if datasetType == "Categorical":
        
        model_a = trainCatergoricalModel_(model_train_X, model_train_Y)
        
    else:
        
        model_a = trainContinuousModel_(model_train_X, model_train_Y)   
    

    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = runModel (model_a, model_train_X, model_train_Y,datasetType), runModel(model_a,edgeServer1_X, edgeServer1_Y,datasetType), runModel(model_a,edgeServer2_X, edgeServer2_Y,datasetType), runModel(model_a,edgeServer3_X, edgeServer3_Y,datasetType), runModel(model_a,edgeServer4_X, edgeServer4_Y,datasetType)
    
    return training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A
    

In [8]:
def load_data (df,predictName: str,datasetType: str):
    
    X = df.drop(columns=[predictName])  # Features
    y = df[predictName]                # Target
    
    # First split: Split data into two halves (50% training and 50% for further splits)
    model_train_X, X_remaining, model_train_Y, y_remaining = train_test_split(X, y, train_size=0.4, random_state=42)
    
    # Second split: Split the remaining data into 2 (50% for edge server 1 and 50% for edge server 2)
    edgeServer1_X, edgeServer2_X, edgeServer1_Y, edgeServer2_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)
    
    # Third split: Split the remaining data into 2 (50% for edge server 3 and 50% for edge server 4)
    edgeServer3_X, edgeServer4_X, edgeServer3_Y, edgeServer4_Y = train_test_split(X_remaining, y_remaining, train_size=0.5, random_state=42)

    
    if datasetType == "Categorical":
        
        model_b = trainCatModel(model_train_X, model_train_Y)
        
    else:
       
        model_b = trainConModel(model_train_X, model_train_Y)
        
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = runModel (model_b, model_train_X, model_train_Y,datasetType), runModel(model_b,edgeServer1_X, edgeServer1_Y,datasetType), runModel(model_b,edgeServer2_X, edgeServer2_Y,datasetType), runModel(model_b,edgeServer3_X, edgeServer3_Y,datasetType), runModel(model_b,edgeServer4_X, edgeServer4_Y,datasetType) 
    
        
    return training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B


In [9]:
def Occupancy():
    print("-------------- Occupancy Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/Occupancy.csv')
    df.drop(columns = ["date"],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'Occupancy',"Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'Occupancy',"Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'Occupancy',"Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [10]:
def powerConsumption():
    print("-------------- Power Consumption Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/Tetuan City power consumption.csv')
    df.drop(columns = ['DateTime','Zone 2  Power Consumption', 'Zone 3  Power Consumption'],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'Zone 1 Power Consumption',"Continuous")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'Zone 1 Power Consumption',"Continuous")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'Zone 1 Power Consumption',"Continuous")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [11]:
def accelerometer_w():
    print("-------------- Accelerometer Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/accelerometer.csv')
#     df.drop(columns = ['wconfid','pctid', 'x', 'y', 'z'],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'wconfid',"Continuous")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'wconfid',"Continuous")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'wconfid',"Continuous")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [12]:
def activity_wsdata():
    print("-------------- Wearable Sensor Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/activity data.csv')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'activity',"Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'activity',"Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'activity',"Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [13]:
def ALEdata():
    print("-------------- ALE Sensor Data Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/ALE in Sensor.csv')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'sd_ale',"Continuous")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A  = loaddata (df,'sd_ale',"Continuous")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'sd_ale',"Continuous")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [14]:
def banknote_data():
    print("-------------- Bank Note Authentication Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/banknote_authen.txt')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'class',"Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A = loaddata (df,'class',"Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'class',"Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return

In [15]:
def RSSI():
    print("-------------- BLE RSSI Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/BLE RSSI.csv')
    df.drop(columns = ['name'],inplace = True)
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'locationStatus', "Categorical")
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A  = loaddata (df,'locationStatus', "Categorical")
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B  = load_data (df,'locationStatus', "Categorical")
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return 

In [16]:
def e_grid():
    print("-------------- Simulated Electrical Grid Dataset Info -------------------- ")
    df = pd.read_csv('C:/Users/moyin/Downloads/data/UCI_named.csv')
    trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy = loadData(df,'stabf', 'Categorical')
    training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A  = loaddata (df,'stabf', 'Categorical')
    training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B = load_data (df,'stabf', 'Categorical')
    displayInfo(trainingAccuacy, edgeServer1Accuracy, edgeServer2Accuracy, edgeServer3Accuracy, edgeServer4Accuracy)
    print("------------------------------------------------------------")
    displayInfo(training_A, edgeServer1_A, edgeServer2_A, edgeServer3_A, edgeServer4_A)
    print("------------------------------------------------------------")
    displayInfo(training_B, edgeServer1_B, edgeServer2_B, edgeServer3_B, edgeServer4_B)
    return  

In [17]:
# if __name__=="__main__":
#     Occupancy()
#     powerConsumption()
#     accelerometer_w()
#     activity_wsdata()
#     ALEdata()
#     banknote_data()
#     RSSI()
#     e_grid()

In [18]:
Occupancy()

-------------- Occupancy Dataset Info -------------------- 
F1 Score on Training Data:  0.9819942364636209
F1 Score on edge1 Data:  0.9869577676135979
F1 Score on edge2 Data:  0.9861240265635398
F1 Score on edge3 Data:  0.9869577676135979
F1 Score on edge4 Data:  0.9861240265635398
------------------------------------------------------------
F1 Score on Training Data:  0.9548665056633183
F1 Score on edge1 Data:  0.9593022109065386
F1 Score on edge2 Data:  0.9550172947293115
F1 Score on edge3 Data:  0.9593022109065386
F1 Score on edge4 Data:  0.9550172947293115
------------------------------------------------------------
F1 Score on Training Data:  0.9820007543733461
F1 Score on edge1 Data:  0.9871856773037787
F1 Score on edge2 Data:  0.9859101485155426
F1 Score on edge3 Data:  0.9871856773037787
F1 Score on edge4 Data:  0.9859101485155426


In [19]:
powerConsumption()

-------------- Power Consumption Dataset Info -------------------- 
F1 Score on Training Data:  40321075.89790141
F1 Score on edge1 Data:  40287870.72399557
F1 Score on edge2 Data:  40352702.26927748
F1 Score on edge3 Data:  40287870.72399557
F1 Score on edge4 Data:  40352702.26927748
------------------------------------------------------------
F1 Score on Training Data:  0.32424237774569675
F1 Score on edge1 Data:  53362707.11958031
F1 Score on edge2 Data:  53785263.2968509
F1 Score on edge3 Data:  53362707.11958031
F1 Score on edge4 Data:  53785263.2968509
------------------------------------------------------------
F1 Score on Training Data:  3313505.068222254
F1 Score on edge1 Data:  25036722.767848965
F1 Score on edge2 Data:  24870460.55589165
F1 Score on edge3 Data:  25036722.767848965
F1 Score on edge4 Data:  24870460.55589165


In [20]:
accelerometer_w()

-------------- Accelerometer Dataset Info -------------------- 
F1 Score on Training Data:  0.6665273789536503
F1 Score on edge1 Data:  0.6679144626099887
F1 Score on edge2 Data:  0.6655025745497011
F1 Score on edge3 Data:  0.6679144626099887
F1 Score on edge4 Data:  0.6655025745497011
------------------------------------------------------------
F1 Score on Training Data:  0.031209150326797386
F1 Score on edge1 Data:  0.5
F1 Score on edge2 Data:  0.4989760348583878
F1 Score on edge3 Data:  0.5
F1 Score on edge4 Data:  0.4989760348583878
------------------------------------------------------------
F1 Score on Training Data:  0.05148081948244786
F1 Score on edge1 Data:  0.30055413311908563
F1 Score on edge2 Data:  0.2960915114527583
F1 Score on edge3 Data:  0.30055413311908563
F1 Score on edge4 Data:  0.2960915114527583


In [21]:
activity_wsdata()

-------------- Wearable Sensor Dataset Info -------------------- 
F1 Score on Training Data:  0.748989898989899
F1 Score on edge1 Data:  0.5542060278902384
F1 Score on edge2 Data:  0.3306379155435759
F1 Score on edge3 Data:  0.5542060278902384
F1 Score on edge4 Data:  0.3306379155435759
------------------------------------------------------------
F1 Score on Training Data:  1.0
F1 Score on edge1 Data:  0.6657657657657657
F1 Score on edge2 Data:  0.3310841205578048
F1 Score on edge3 Data:  0.6657657657657657
F1 Score on edge4 Data:  0.3310841205578048
------------------------------------------------------------
F1 Score on Training Data:  1.0
F1 Score on edge1 Data:  0.5555555555555555
F1 Score on edge2 Data:  0.42766808620467156
F1 Score on edge3 Data:  0.5555555555555555
F1 Score on edge4 Data:  0.42766808620467156


In [22]:
ALEdata()

-------------- ALE Sensor Data Dataset Info -------------------- 
F1 Score on Training Data:  0.009856677104670638
F1 Score on edge1 Data:  0.013456816495941125
F1 Score on edge2 Data:  0.014464094915140807
F1 Score on edge3 Data:  0.013456816495941125
F1 Score on edge4 Data:  0.014464094915140807
------------------------------------------------------------
F1 Score on Training Data:  0.10007447064704568
F1 Score on edge1 Data:  0.12150006146673337
F1 Score on edge2 Data:  0.09173887210476422
F1 Score on edge3 Data:  0.12150006146673337
F1 Score on edge4 Data:  0.09173887210476422
------------------------------------------------------------
F1 Score on Training Data:  0.10007447064704568
F1 Score on edge1 Data:  0.12150006146673337
F1 Score on edge2 Data:  0.09173887210476422
F1 Score on edge3 Data:  0.12150006146673337
F1 Score on edge4 Data:  0.09173887210476422


In [23]:
banknote_data()

-------------- Bank Note Authentication Dataset Info -------------------- 
F1 Score on Training Data:  0.9908294648624587
F1 Score on edge1 Data:  0.9925959205678857
F1 Score on edge2 Data:  0.9876598676131428
F1 Score on edge3 Data:  0.9925959205678857
F1 Score on edge4 Data:  0.9876598676131428
------------------------------------------------------------
F1 Score on Training Data:  0.8469989471170181
F1 Score on edge1 Data:  0.8474101868211266
F1 Score on edge2 Data:  0.8091938675471994
F1 Score on edge3 Data:  0.8474101868211266
F1 Score on edge4 Data:  0.8091938675471994
------------------------------------------------------------
F1 Score on Training Data:  0.9926654620892725
F1 Score on edge1 Data:  0.9925959205678857
F1 Score on edge2 Data:  0.9876598676131428
F1 Score on edge3 Data:  0.9925959205678857
F1 Score on edge4 Data:  0.9876598676131428


In [None]:
RSSI()

-------------- BLE RSSI Dataset Info -------------------- 


In [None]:
e_grid()