In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import sys
import torch
sys.path.append('../../scripts')
from datasets_gen import CustomDataset
from simple_nn import SimpleNN, train, test, run_model
from federated_functions import average_model_weights

In [2]:
df = pd.read_csv("../../datasets/telemetry_analysis/processed/telemetry_analysis.csv", index_col=[0])

In [3]:
X = df.drop(columns= ['encoded_errors'])
Y = df['encoded_errors']

In [4]:
type(X), type(Y)

(pandas.core.frame.DataFrame, pandas.core.series.Series)

In [5]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [6]:
x_train

Unnamed: 0,machineID,volt,rotate,pressure,vibration
832069,95,0.602489,0.622592,0.695032,0.506543
2508,1,0.659271,0.610608,0.553617,0.557782
796662,91,0.672887,0.623010,0.538234,0.494785
1411,1,0.630805,0.557530,0.539619,0.522884
261833,30,0.697842,0.622484,0.502924,0.649536
...,...,...,...,...,...
259178,30,0.753139,0.625635,0.497996,0.497549
365838,42,0.617446,0.707746,0.737744,0.470528
131932,16,0.740271,0.589553,0.590955,0.599203
671155,77,0.761218,0.851997,0.492759,0.581905


In [7]:
train = pd.concat([x_train, y_train], axis=1)

In [8]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 701122 entries, 832069 to 121958
Data columns (total 6 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   machineID       701122 non-null  int64  
 1   volt            701122 non-null  float64
 2   rotate          701122 non-null  float64
 3   pressure        701122 non-null  float64
 4   vibration       701122 non-null  float64
 5   encoded_errors  701122 non-null  int64  
dtypes: float64(4), int64(2)
memory usage: 37.4 MB


In [9]:
# Create a list to hold the DataFrame parts
df_parts = []

# Define the ranges
ranges = [(1, 20), (21, 40), (41, 60), (61, 80), (81, 100)]

# Loop through the ranges and slice the DataFrame
for start, end in ranges:
    df_part = train[(train['machineID'] >= start) & (train['machineID'] <= end)]
    df_parts.append(df_part)

In [10]:
df_parts

[        machineID      volt    rotate  pressure  vibration  encoded_errors
 2508            1  0.659271  0.610608  0.553617   0.557782               1
 1411            1  0.630805  0.557530  0.539619   0.522884               1
 168657         20  0.577362  0.618472  0.559892   0.609290               1
 163016         19  0.616169  0.676592  0.563412   0.580447               1
 62264           8  0.703740  0.721674  0.567376   0.515681               1
 ...           ...       ...       ...       ...        ...             ...
 137337         16  0.672545  0.782248  0.570821   0.541290               1
 54886           7  0.759167  0.696788  0.547027   0.537574               1
 110268         13  0.644481  0.584087  0.492728   0.482480               1
 131932         16  0.740271  0.589553  0.590955   0.599203               1
 121958         14  0.745417  0.628050  0.485034   0.576390               1
 
 [140235 rows x 6 columns],
         machineID      volt    rotate  pressure  vibratio

In [11]:
df_processed =[]
for df in range(len(df_parts)):
    X = df_parts[df].drop(columns= ['machineID', 'encoded_errors'])
    X.shape
    Y = df_parts[df].encoded_errors
    print(X.shape, Y.shape)
    print(Y.iloc[0])
    train_dataset = CustomDataset(X, Y)


    df_processed.append(train_dataset)
x_test = x_test.drop(columns= ['machineID'])
test_dataset = CustomDataset(x_test, y_test)

(140235, 4) (140235,)
1
(140313, 4) (140313,)
1
(140261, 4) (140261,)
1
(140070, 4) (140070,)
1
(140243, 4) (140243,)
1


In [12]:
df_processed

[<datasets_gen.CustomDataset at 0x249f380a580>,
 <datasets_gen.CustomDataset at 0x249f381ff10>,
 <datasets_gen.CustomDataset at 0x249f79c6700>,
 <datasets_gen.CustomDataset at 0x24986419040>,
 <datasets_gen.CustomDataset at 0x249864197f0>]

In [13]:
input_size = X.shape[1]
hidden_size = 10
num_classes = 2

model_1 = model_2 =  model_3 = model_4 = model_5 = SimpleNN(input_size, hidden_size, num_classes)
models = [model_1, model_2, model_3, model_4, model_5]

training_weights = []

In [14]:
input_size

4

In [15]:
len(df_processed)

5

In [16]:
for i in range(len(df_processed)):
    print(f"Model {i+1}")
    run_model(epochs=5, lr=0.01, model=models[i], weights_list=training_weights, train_dataset=df_processed[i], test_dataset=test_dataset )
    torch.save(models[i].state_dict(), f"federated_models/federated_model_{i+1}.pt")

Model 1
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[2463.0169595119078, 0.9956298743160982, 0.9912788466306894, 0.9956298743160982, 0.993449596429198]
Model 2
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[2461.474209082313, 0.9956298743160982, 0.9912788466306894, 0.9956298743160982, 0.993449596429198]
Model 3
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[2493.964671781985, 0.9956298743160982, 0.9912788466306894, 0.9956298743160982, 0.993449596429198]
Model 4
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[2468.677680219058, 0.9956298743160982, 0.9912788466306894, 0.9956298743160982, 0.993449596429198]
Model 5
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
[2457.600589037291, 0.9956298743160982, 0.9912788466306894, 0.9956298743160982, 0.993449596429198]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [17]:
len(training_weights)

5

In [18]:
average_weights = average_model_weights(weight_list=training_weights)
new_model = SimpleNN(input_size, hidden_size, num_classes)
new_model.load_state_dict(average_weights)

metrics = test(new_model, DataLoader(test_dataset, batch_size=2, shuffle=False))
print(f"loss = {metrics[0]} ")
print(f"accuracy = {metrics[1]}")
print(f"precision = {metrics[2]} ")
print(f"recall = {metrics[3]}")
print(f"f1 = {metrics[4]} ")


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


loss = 2457.6005894650007 
accuracy = 0.9956298743160982
precision = 0.9912788466306894 
recall = 0.9956298743160982
f1 = 0.993449596429198 
