In [None]:
!pip install flwr torch scikit-learn

In [None]:
import os
import pandas as pd
import numpy as np
import zipfile
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
from sklearn.model_selection import train_test_split
from typing import Dict, List, Optional, Tuple
import flwr as fl   
from flwr.server.strategy import FedAvg
from flwr.server.client_manager import ClientManager
from flwr.server.client_proxy import ClientProxy
from flwr.common import NDArrays, Parameters, Scalar, FitRes, EvaluateRes, ndarrays_to_parameters, parameters_to_ndarrays
from flwr.server import ServerConfig

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import umap

# Load data
train_data = pd.read_csv('/kaggle/input/UjiIndoorLoc/TrainingData.csv', header=0)
test_data = pd.read_csv('/kaggle/input/UjiIndoorLoc/ValidationData.csv', header=0)
train_data = train_data.sample(frac=1).reset_index(drop=True)

# Extract features and device IDs
features = train_data.filter(regex='^WAP')  # Select only WAP columns
device_ids = train_data['PHONEID']

# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Option 1: Dimensionality Reduction with PCA
pca = PCA(n_components=50)  # Reduce to 50 components (adjust as needed)
features_pca = pca.fit_transform(features_scaled)

# Apply t-SNE on PCA-reduced features
tsne = TSNE(n_components=2, perplexity=30, learning_rate=200, random_state=0)
features_tsne = tsne.fit_transform(features_pca)

# Create a DataFrame for plotting
tsne_df = pd.DataFrame(data=features_tsne, columns=['TSNE1', 'TSNE2'])
tsne_df['PHONEID'] = device_ids  # Add device IDs to the DataFrame

# Plotting
plt.figure(figsize=(12, 8))
sns.scatterplot(x='TSNE1', y='TSNE2', hue='PHONEID', palette='tab20', data=tsne_df, legend='full')
plt.title('t-SNE Visualization of Non-IID Data by Device')
plt.xlabel('t-SNE Axis 1')
plt.ylabel('t-SNE Axis 2')
plt.legend(loc='best', bbox_to_anchor=(1.05, 1), title='Device ID')
plt.show()


In [None]:
tsne_df = pd.DataFrame(data=features_tsne, columns=['TSNE1', 'TSNE2'])
tsne_df['PHONEID'] = device_ids  # Add device IDs to the DataFrame

# Plotting
plt.figure(figsize=(12, 8))
sns.scatterplot(x='TSNE1', y='TSNE2', hue='PHONEID', palette='tab20', data=tsne_df, legend='full')
plt.title('t-SNE Visualization of UJIIndoorLoc Dataset by Device')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.legend(loc='best', bbox_to_anchor=(1.05, 1), title='Device ID')
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler

# Load data
train_data = pd.read_csv('/kaggle/input/UjiIndoorLoc/TrainingData.csv', header=0)

# Extract features and device IDs
features = train_data.filter(regex='^WAP')  # Select only WAP columns
device_ids = train_data['PHONEID']  # Use 'PHONEID' for device identification

# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Dimensionality Reduction with PCA before t-SNE (optional, but helps speed up t-SNE)
pca = PCA(n_components=50)  # Reduce to 50 components
features_pca = pca.fit_transform(features_scaled)

# Apply t-SNE
tsne = TSNE(n_components=2, perplexity=30, learning_rate=200, random_state=0)
features_tsne = tsne.fit_transform(features_pca)

# Create a DataFrame for plotting
tsne_df = pd.DataFrame(data=features_tsne, columns=['TSNE1', 'TSNE2'])
tsne_df['PHONEID'] = device_ids  # Add device IDs to the DataFrame

# Plotting
plt.figure(figsize=(14, 10))
sns.scatterplot(x='TSNE1', y='TSNE2', hue='PHONEID', palette='tab20', data=tsne_df, legend='full')
plt.title('t-SNE Visualization of Non-IID data by Device')
plt.xlabel('t-SNE Axis 1')
plt.ylabel('t-SNE Axis 1')
plt.legend(loc='best', bbox_to_anchor=(1.05, 1), title='Device ID')
plt.show()


In [None]:
train_data = pd.read_csv('/kaggle/input/UjiIndoorLoc/TrainingData.csv', header=0)
test_data = pd.read_csv('/kaggle/input/UjiIndoorLoc/ValidationData.csv', header=0)
train_data = train_data.sample(frac=1).reset_index(drop=True)

labeled_ratio = 0.3

num_labeled = int(len(train_data) * labeled_ratio)

labeled_train_data = train_data.iloc[:num_labeled].copy()
unlabled_train_data = train_data.iloc[num_labeled:].copy()

In [None]:
print(train_data.head())
print(test_data.head())

In [None]:
def transform_rss(rss, min_rss=-104, alpha=np.e):
    if rss < min_rss or rss > 0:
        return 0
    else:
        return ((rss - min_rss)/ -min_rss) ** alpha

In [None]:
def preprocess_data(data):
    features = data.iloc[:, :-9].map(lambda x: transform_rss(x))  # selecting the WAPs
    labels = data['BUILDINGID'] * 5 + data['FLOOR']  # Extracting the floor and building numbers from dataset
    return features.to_numpy(), labels.to_numpy()

In [None]:
# Creating pytorch datasets
class ujiindoor_dataset (Dataset):
    def __init__ (self, data,labels):
        if isinstance(data, torch.Tensor):
            self.data = data.clone().detach().float()
        else:
            self.data = torch.tensor(data, dtype=torch.float32)
        
        #i am not sure if we want labels to be in tensor form too or just set it with labels
        if labels is not None:
            if isinstance(labels, torch.Tensor):
                self.labels = labels.clone().detach().long()
            else:
                self.labels = torch.tensor(labels, dtype=torch.long)
        else:
            self.labels = None
            
            
    def __len__ (self):
        return len(self.data)
    
    def __getitem__(self, index):
        if self.labels is None:
            return self.data[index], self.data[index]
        return self.data[index], self.labels[index]

In [None]:
# Extract features and labels from the data
labeled_features,labeled_labels = preprocess_data(labeled_train_data)
unlabeled_features,unlabeled_labels = preprocess_data(unlabled_train_data)
test_features, test_labels = preprocess_data(test_data)

In [None]:
autoencoder_labeled_data = ujiindoor_dataset(labeled_features, labeled_labels) #train loader of autoencoder
labeled_loader = DataLoader(autoencoder_labeled_data, batch_size=32, shuffle=True)


autoencoder_test_data = ujiindoor_dataset(test_features, test_labels) # val loader of autoencoder
test_loader = DataLoader(autoencoder_test_data, batch_size=32, shuffle=False)

In [None]:
class AutoencoderClassifier(nn.Module):
    def __init__(self, input_dim=520, encoding_dim=64, num_classes=15):
        super(AutoencoderClassifier, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(True),
            nn.Dropout(0.3),  # Add Dropout
            nn.Linear(256, 128),
            nn.ReLU(True),
            nn.Dropout(0.3),  # Add Dropout
            nn.Linear(128, encoding_dim),
            nn.ReLU(True),
            nn.Dropout(0.3)  # Add Dropout
        )
        
        # Decoder for Autoencoder
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, 128),
            nn.ReLU(True),
            nn.Dropout(0.3),  # Add Dropout
            nn.Linear(128, 256),
            nn.ReLU(True),
            nn.Dropout(0.3),  # Add Dropout
            nn.Linear(256, input_dim),
            nn.Sigmoid()
        )
        
        # Classifier
        self.depthwise_conv1 = nn.Conv1d(in_channels=1, out_channels=99, kernel_size=22, groups=1)
        self.pointwise_conv1 = nn.Conv1d(in_channels=99, out_channels=99, kernel_size=1)
        self.depthwise_conv2 = nn.Conv1d(in_channels=99, out_channels=66, kernel_size=22, groups=1)
        self.pointwise_conv2 = nn.Conv1d(in_channels=66, out_channels=66, kernel_size=1)
        self.depthwise_conv3 = nn.Conv1d(in_channels=66, out_channels=33, kernel_size=22, groups=1)
        self.pointwise_conv3 = nn.Conv1d(in_channels=33, out_channels=33, kernel_size=1)
        self.fc1 = nn.Linear(33, 128)
        self.dropout = nn.Dropout(0.5)  # Add Dropout
        self.fc2 = nn.Linear(128, num_classes)  # num_classes = number of unique (building, floor) combinations
        
    def forward(self, x):
        encoded = self.encoder(x)
        
        # Decoder part (Autoencoder)
        decoded = self.decoder(encoded)
        
        # Classifier part
        x = encoded.unsqueeze(1)  # Add channel dimension for Conv1d
        x = self.depthwise_conv1(x)
        x = self.pointwise_conv1(x)
        x = self.depthwise_conv2(x)
        x = self.pointwise_conv2(x)
        x = self.depthwise_conv3(x)
        x = self.pointwise_conv3(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.dropout(x)  # Apply Dropout
        x = self.fc2(x)
        
        return decoded, x

In [None]:
# Define loss functions
criterion_ae = nn.MSELoss()
criterion_cls = nn.CrossEntropyLoss()
weight_decay = 1e-4

# Initialize and train the model
model = AutoencoderClassifier()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=weight_decay)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    epoch_loss_ae = 0
    epoch_loss_cls = 0
    for inputs, labels in labeled_loader:
        optimizer.zero_grad()
        decoded, outputs = model(inputs)
        loss_ae = criterion_ae(decoded, inputs)
        loss_cls = criterion_cls(outputs, labels.long()) 
        loss = loss_ae + loss_cls
        loss.backward()
        optimizer.step()
        epoch_loss_ae += loss_ae.item()
        epoch_loss_cls += loss_cls.item()
    avg_loss_ae = epoch_loss_ae / len(labeled_loader)
    avg_loss_cls = epoch_loss_cls / len(labeled_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Autoencoder Loss: {avg_loss_ae:.4f}, Classifier Loss: {avg_loss_cls:.4f}")

# Save the pre-trained model
torch.save(model.state_dict(), 'pretrained_autoencoder_classifier2.pth')

In [None]:
model = AutoencoderClassifier()
model.load_state_dict(torch.load('pretrained_autoencoder_classifier2.pth'))
model.eval()  # Set model to evaluation mode

# Define loss functions
criterion_ae = nn.MSELoss()
criterion_cls = nn.CrossEntropyLoss()

# Create test data loader
# Assuming test_loader is defined and contains your test dataset
# Example: test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

test_loss_ae = 0
test_loss_cls = 0
correct_predictions = 0
total_samples = 0

with torch.no_grad():  # Disable gradient computation for testing
    for inputs, labels in test_loader:
        decoded, outputs = model(inputs)
        
        # Calculate autoencoder loss
        loss_ae = criterion_ae(decoded, inputs)
        test_loss_ae += loss_ae.item()
        
        # Calculate classifier loss
        loss_cls = criterion_cls(outputs, labels.long())  # Convert labels to Long type
        test_loss_cls += loss_cls.item()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

# Calculate average losses
avg_test_loss_ae = test_loss_ae / len(test_loader)
avg_test_loss_cls = test_loss_cls / len(test_loader)

# Calculate accuracy
accuracy = correct_predictions / total_samples

print(f"Test Autoencoder Loss: {avg_test_loss_ae:.4f}")
print(f"Test Classifier Loss: {avg_test_loss_cls:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

In [None]:
class ClassifierClient(fl.client.NumPyClient):
    def __init__(self, cid, net, train_loader, val_loader):
        self.cid = cid
        self.net = net.to(device)
        self.train_loader = train_loader
        self.val_loader = val_loader
        
    def get_parameters(self, config):
        return [val.cpu().detach().numpy() for val in self.net.parameters()]
    
    def set_parameters(self, parameters):
        for val, param in zip(parameters, self.net.parameters()):
            param.data = torch.tensor(val, dtype=torch.float32).to(device)
    
    def generate_pseudo_labels(self, inputs):
        self.net.eval()
        with torch.no_grad():
            _, outputs = self.net(inputs)
            _, pseudo_labels = torch.max(outputs, 1)
        return pseudo_labels

    def fit(self, parameters, config):
        self.set_parameters(parameters)
        self.net.train()
        criterion_ae = nn.MSELoss()
        criterion_cls = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.net.parameters(), lr=0.0001)  # Reduced learning rate
        alpha = 1.0  # Scaling factor for classifier loss
        
        for epoch in range(3):  # Adjust the number of epochs if needed
            epoch_loss = 0.0
            for i, (inputs, _) in enumerate(self.train_loader):
                inputs = inputs.to(device)
                
                # Generate pseudo-labels for the unlabeled data
                pseudo_labels = self.generate_pseudo_labels(inputs)
                
                optimizer.zero_grad()
                decoded, out = self.net(inputs)
                
                # Compute the losses
                loss_ae = criterion_ae(decoded, inputs)
                loss_cls = criterion_cls(out, pseudo_labels)
                loss = loss_ae + alpha * loss_cls
                
                # Backpropagation
                loss.backward()
                
                # Gradient clipping
                torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.0)
                
                optimizer.step()
                
                epoch_loss += loss.item()
                
                if i % 10 == 9:  # Print every 10 mini-batches
                    print(f"[Client {self.cid}, Epoch {epoch + 1}, Batch {i + 1}] loss: {epoch_loss / 10:.4f}")
                    epoch_loss = 0.0

            print(f"Client {self.cid}, Epoch {epoch + 1}, Loss: {epoch_loss / len(self.train_loader)}")
            
        return self.get_parameters(config), len(self.train_loader.dataset), {}
    
    def evaluate(self, parameters, config):
        self.set_parameters(parameters)
        self.net.eval()
        criterion_cls = nn.CrossEntropyLoss()
        
        loss_cls = 0.0
        correct_cls = 0
        total = 0
        
        with torch.no_grad():
            for inputs, labels in self.val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                _, out = self.net(inputs)
                
                loss_cls += criterion_cls(out, labels).item()
                
                _, predicted_cls = torch.max(out.data, 1)
                
                total += labels.size(0)
                correct_cls += (predicted_cls == labels).sum().item()
        
        loss = loss_cls / total
        accuracy_cls = 100 * correct_cls / total
        
        return loss, len(self.val_loader.dataset), {"accuracy": accuracy_cls}


In [None]:
clients = []
num_clients = 10
unlabeled_client_datasets = torch.chunk(torch.tensor(unlabeled_features, dtype=torch.float32),num_clients)

In [None]:
pretrained_weights = torch.load('pretrained_autoencoder_classifier2.pth')

print(f"Number of chunks: {len(unlabeled_client_datasets)}")    
for cid, client_datasets in enumerate(unlabeled_client_datasets):
    print(f"Chunk {cid} size: {client_datasets.size(0)}")
    client_net = AutoencoderClassifier()
    client_net.load_state_dict(pretrained_weights)
    client_data = DataLoader(ujiindoor_dataset(client_datasets, None), batch_size=32, shuffle=True)
    print(f"Training DataLoader for client {cid} has {len(client_data.dataset)} samples")
    print(client_data.dataset)

    test_loader = DataLoader(autoencoder_test_data, batch_size=32, shuffle=False)
  
    client = ClassifierClient(cid, client_net, client_data, test_loader) #during evaluation this might cause issues
    clients.append(client)

In [None]:
class SimilarityAggregationStrategy(FedAvg):
    def __init__(self, initial_rounds: int = 2, gamma: float = 0.5, similarity_threshold: float = 0.5, num_similar_clients: int = 4):
        super().__init__()
        self.initial_rounds = initial_rounds
        self.gamma = gamma
        self.similarity_threshold = similarity_threshold
        self.num_similar_clients = num_similar_clients
        self.client_histories = {}
        self.global_round = 0

    def aggregate_fit(
        self,
        server_round: int,
        results: List[Tuple[ClientProxy, FitRes]],
        failures: List[BaseException],
    ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]:
        
        # Convert Parameters to list of NumPy arrays
        weights = [parameters_to_ndarrays(r[1].parameters) for r in results]
        if self.global_round < self.initial_rounds:
            aggregated_weights = self.fedavg(weights)
        else:
            aggregated_weights = self.similarity_aggregate(weights, results)
        
        self.global_round += 1

        # Save the final aggregated weights
        if self.global_round == 5:
            print("Saving final aggregated weights...")
            self.save_model(aggregated_weights, "/kaggle/working/final_aggregated_model.pth")

        return ndarrays_to_parameters(aggregated_weights), {}

    def fedavg(self, weights: List[NDArrays]) -> List[np.ndarray]:
        avg_weights = [np.mean([w[i] for w in weights], axis=0) for i in range(len(weights[0]))]
        return avg_weights

    def similarity_aggregate(self, weights: List[NDArrays], results: List[Tuple[ClientProxy, FitRes]]) -> List[np.ndarray]:
        similarities = self.calculate_similarities(weights, results)
        aggregated_weights = []
        for i, (client, fit_res) in enumerate(results):
            similar_clients = [j for j, sim in enumerate(similarities[i]) if sim > self.similarity_threshold]
            if len(similar_clients) > self.num_similar_clients:
                similar_clients = np.argsort(similarities[i])[-self.num_similar_clients:]
            similar_weights = [weights[j] for j in similar_clients]
            client_weight = weights[i]
            aggregated_weight = np.mean([client_weight] + similar_weights, axis=0)
            aggregated_weights.append(aggregated_weight)
        final_aggregated_weights = [np.mean([w[i] for w in aggregated_weights], axis=0) for i in range(len(weights[0]))]
        return final_aggregated_weights

    def calculate_similarities(self, weights: List[NDArrays], results: List[Tuple[ClientProxy, FitRes]]) -> List[List[float]]:

        similarities = []
        for i in range(len(results)):
            client_similarities = []
            for j in range(len(results)):
                if i == j:
                    client_similarities.append(1.0)
                else:
                    sim = self.calculate_similarity(weights[i], weights[j])
                    client_similarities.append(sim)
            similarities.append(client_similarities)
        return similarities

    def calculate_similarity(self, weight_i: NDArrays, weight_j: NDArrays) -> float:
        grad_i = [wi - w0 for wi, w0 in zip(weight_i, self.client_histories.get(i, weight_i))]
        grad_j = [wj - w0 for wj, w0 in zip(weight_j, self.client_histories.get(j, weight_j))]
        acc_grad_i = [wi - w0 for wi, w0 in zip(weight_i, self.client_histories.get(i, weight_i))]
        acc_grad_j = [wj - w0 for wj, w0 in zip(weight_j, self.client_histories.get(j, weight_j))]

        similarity_term_1 = np.dot(grad_i, grad_j) / (np.linalg.norm(grad_i) * np.linalg.norm(grad_j))
        similarity_term_2 = np.dot(acc_grad_i, acc_grad_j) / (np.linalg.norm(acc_grad_i) * np.linalg.norm(acc_grad_j))
        
        similarity = self.gamma * similarity_term_1 + (1 - self.gamma) * similarity_term_2
        return similarity

    def save_model(self, weights: List[np.ndarray], filename: str):
        # Create a dummy model to generate the correct state_dict
        dummy_model = AutoencoderClassifier()
        state_dict = dummy_model.state_dict()
        
        # Map the list of weights to the state_dict
        for (key, param), weight in zip(state_dict.items(), weights):
            state_dict[key] = torch.tensor(weight)
        
        # Save the state_dict
        torch.save(state_dict, filename)


In [None]:
def client_fn(cid) -> fl.client.Client:
    return clients[int(cid)]

In [None]:
strategy = SimilarityAggregationStrategy()

fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=num_clients,
    strategy=strategy,
    config=ServerConfig(num_rounds=5),
    client_resources={'num_gpus': 1, 'num_cpus': 1},
)

In [None]:
global_model = AutoencoderClassifier()
global_model.load_state_dict(torch.load('/kaggle/input/fd/other/default/1/final_aggregated_model.pth'))
global_model.eval()  # Set model to evaluation mode

# Define loss functions
criterion_ae = nn.MSELoss()
criterion_cls = nn.CrossEntropyLoss()


test_loss_ae = 0
test_loss_cls = 0
correct_predictions = 0
total_samples = 0

with torch.no_grad():  # Disable gradient computation for testing
    for inputs, labels in test_loader:
        decoded, outputs = global_model(inputs)
        
        # Calculate autoencoder loss
        loss_ae = criterion_ae(decoded, inputs)
        test_loss_ae += loss_ae.item()
        
        # Calculate classifier loss
        loss_cls = criterion_cls(outputs, labels.long())  # Convert labels to Long type
        test_loss_cls += loss_cls.item()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

# Calculate average losses
avg_test_loss_ae = test_loss_ae / len(test_loader)
avg_test_loss_cls = test_loss_cls / len(test_loader)

# Calculate accuracy
federated_accuracy = correct_predictions / total_samples

print(f"Test Autoencoder Loss: {avg_test_loss_ae:.4f}")
print(f"Test Classifier Loss: {avg_test_loss_cls:.4f}")
print(f"Test Accuracy: {federated_accuracy:.4f}")

In [None]:
testy = DataLoader(autoencoder_test_data, batch_size=32, shuffle=False)

# Running the model to get predictions
all_predictions = []
all_labels = []

with torch.no_grad():
    for inputs, labels in testy:
        inputs = inputs
        labels = labels
        
        _, outputs = global_model(inputs)
        
        _, predicted = torch.max(outputs, 1)
        
        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compare predictions with the true labels
for i in range(len(all_predictions)):
    print(f"Predicted: {all_predictions[i]}, True Label: {all_labels[i]}")


In [None]:
center = AutoencoderClassifier()
criterion_ae = nn.MSELoss()
optimizer = optim.Adam(center.parameters(), lr=0.001, weight_decay=1e-4)

# Create DataLoader for unlabeled data
unlabeled_dataset = ujiindoor_dataset(unlabeled_features, None)
unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=32, shuffle=True)

# Training loop for the autoencoder part only
num_epochs = 10
for epoch in range(num_epochs):
    center.train()
    epoch_loss_ae = 0
    for inputs, _ in unlabeled_loader:
        optimizer.zero_grad()
        decoded, _ = center(inputs)
        loss_ae = criterion_ae(decoded, inputs)
        loss_ae.backward()
        optimizer.step()
        epoch_loss_ae += loss_ae.item()
    avg_loss_ae = epoch_loss_ae / len(unlabeled_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Autoencoder Loss: {avg_loss_ae:.4f}")

# Save the autoencoder model
torch.save(center.state_dict(), 'autoencoder_classifier_unlabeled2.pth')


In [None]:
# Load the trained autoencoder model
center.load_state_dict(torch.load('autoencoder_classifier_unlabeled2.pth'))
center.eval()

# Generate pseudo-labels using the trained autoencoder's encoder part
pseudo_labels = []
with torch.no_grad():
    for inputs, _ in unlabeled_loader:
        _, outputs = center(inputs)
        _, predicted = torch.max(outputs, 1)
        pseudo_labels.extend(predicted.cpu().numpy())

pseudo_labels = np.array(pseudo_labels)

# Create a new dataset with pseudo-labels
pseudo_labeled_dataset = ujiindoor_dataset(unlabeled_features, pseudo_labels)
pseudo_labeled_loader = DataLoader(pseudo_labeled_dataset, batch_size=32, shuffle=True)


In [None]:
# Combine labeled and pseudo-labeled data
combined_loader = DataLoader(
    ujiindoor_dataset(np.concatenate([labeled_features, unlabeled_features]), 
                     np.concatenate([labeled_labels, pseudo_labels])),
    batch_size=32, shuffle=True
)

# Reinitialize the optimizer for the combined training
optimizer = optim.Adam(center.parameters(), lr=0.001, weight_decay=1e-4)

# Training loop for both labeled and pseudo-labeled data
num_epochs = 10
for epoch in range(num_epochs):
    center.train()
    epoch_loss_ae = 0
    epoch_loss_cls = 0
    for inputs, labels in combined_loader:
        optimizer.zero_grad()
        decoded, outputs = center(inputs)
        loss_ae = criterion_ae(decoded, inputs)
        loss_cls = criterion_cls(outputs, labels.long())
        loss = loss_ae + loss_cls
        loss.backward()
        optimizer.step()
        epoch_loss_ae += loss_ae.item()
        epoch_loss_cls += loss_cls.item()
    avg_loss_ae = epoch_loss_ae / len(combined_loader)
    avg_loss_cls = epoch_loss_cls / len(combined_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Autoencoder Loss: {avg_loss_ae:.4f}, Classifier Loss: {avg_loss_cls:.4f}")

# Save the combined trained model
torch.save(model.state_dict(), 'autoencoder_classifier_combined3.pth')

In [None]:
# Load the combined trained model
zz = AutoencoderClassifier()
zz.load_state_dict(torch.load('autoencoder_classifier_combined3.pth'))
zz.eval()  # Set model to evaluation mode

# Create DataLoader for test data
test_dataset = ujiindoor_dataset(test_features, test_labels)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Evaluation loop for the autoencoder and classifier
test_loss_ae = 0
test_loss_cls = 0
correct_predictions = 0
total_samples = 0

with torch.no_grad():  # Disable gradient computation for testing
    for inputs, labels in test_loader:
        decoded, outputs = zz(inputs)
        
        # Calculate autoencoder loss
        loss_ae = criterion_ae(decoded, inputs)
        test_loss_ae += loss_ae.item()
        
        # Calculate classifier loss
        loss_cls = criterion_cls(outputs, labels.long())
        test_loss_cls += loss_cls.item()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

# Calculate average losses
avg_test_loss_ae = test_loss_ae / len(test_loader)
avg_test_loss_cls = test_loss_cls / len(test_loader)

# Calculate accuracy
centralized_accuracy = correct_predictions / total_samples

print(f"Test Autoencoder Loss: {avg_test_loss_ae:.4f}")
print(f"Test Classifier Loss: {avg_test_loss_cls:.4f}")
print(f"Test Accuracy: {centralized_accuracy:.4f}")


In [None]:
import matplotlib.pyplot as plt

# Accuracies
accuracies = [federated_accuracy, centralized_accuracy]
labels = ['Federated Learning with Similarity Strategy', 'Centralized Learning']

# Plotting the accuracies
plt.figure(figsize=(10, 6))

# Bar plot
plt.bar(labels, accuracies, color=['blue', 'green'])

plt.xlabel('Learning Approach (100 Epochs)')
plt.ylabel('Accuracy')
plt.title('Accuracy Comparison: Federated vs Centralized Learning (NON-IID)')
plt.ylim(0, 1)  # Assuming accuracy is between 0 and 1
plt.grid(True)

# Adding the accuracy values on top of the bars
for i, v in enumerate(accuracies):
    plt.text(i, v + 0.02, f"{v:.2%}", ha='center', fontweight='bold')

plt.savefig('accuracy_comparison.png')  
plt.show()
